Use RBM to perform feature extraction on an image-based dataset that you find or create. If you go this route, present the features you extract and explain why this is a useful feature extraction method in the context you’re operating in.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import the model.
from sklearn.neural_network import BernoulliRBM

from sklearn.model_selection import cross_val_score

from PIL import Image

In [3]:
# Create a list of the image names (1-50 will be used)
N = 50
images = []
images = range(1,N+1)

# images are 32 x 32 pixels = 1024 pixels / features
features = []
features = range(1,1024)
greyscale = pd.DataFrame()

for value in range(1,N+1):
    # Load the image 
    im = Image.open('D:\\Data\\train\\train\\' + str(value) + '.png').convert('L')
    pix = im.load()
    width = im.size[0]
    height = im.size[1]
    image_pix_values = []
    for x in range(width):
        for y in range(height):
            pix_values = pix[x,y]
            image_pix_values.append(pix_values)
    # Store the values by each picture (groups of 1024 values)
    greyscale = greyscale.append(pd.DataFrame(data = [image_pix_values]), ignore_index = True)

In [4]:
greyscale

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,61,18,23,26,36,52,74,88,107,126,...,179,170,132,110,90,52,22,38,59,98
1,171,155,151,144,135,142,153,171,134,73,...,189,170,155,92,91,124,90,86,125,137
2,255,255,255,255,255,255,255,255,255,255,...,81,77,84,91,80,74,82,77,81,83
3,24,27,32,46,66,66,66,68,46,42,...,73,66,68,76,56,66,79,74,75,64
4,179,179,167,110,97,91,83,80,78,72,...,111,76,59,50,22,13,30,64,74,76
5,118,94,80,79,80,88,82,91,112,87,...,129,159,164,148,135,124,114,106,98,90
6,179,184,161,126,99,94,98,82,82,69,...,153,145,163,169,145,150,157,146,147,141
7,33,30,36,49,86,162,206,220,227,223,...,201,188,170,160,149,141,149,156,144,123
8,174,176,172,171,170,170,139,104,112,107,...,93,92,87,84,83,81,78,79,80,80
9,123,144,183,197,153,136,150,154,149,168,...,22,22,23,24,31,70,96,104,115,120


In [5]:
X = greyscale

# Set up the model here
brbm = BernoulliRBM(n_components = 100)
brbm.fit(X)



BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=100, n_iter=10,
       random_state=None, verbose=0)

In [6]:
brbm.transform(X)

array([[ 1.,  0.,  1., ...,  0.,  0.,  1.],
       [ 1.,  0.,  1., ...,  0.,  0.,  1.],
       [ 1.,  0.,  1., ...,  0.,  0.,  1.],
       ..., 
       [ 1.,  0.,  1., ...,  0.,  0.,  1.],
       [ 1.,  0.,  1., ...,  0.,  0.,  1.],
       [ 1.,  0.,  1., ...,  0.,  0.,  1.]])

In [7]:
brbm.components_

array([[  6.23671210e+02,   6.18573616e+02,   6.06168851e+02, ...,
          5.84067633e+02,   5.99534368e+02,   6.00889358e+02],
       [ -1.99816520e-02,  -1.87102903e-02,  -9.47090662e-03, ...,
         -2.17704408e-02,  -3.66845463e-02,  -6.06375376e-03],
       [  6.20553643e+02,   6.15222325e+02,   6.02698376e+02, ...,
          5.81488143e+02,   5.96843818e+02,   5.98137465e+02],
       ..., 
       [ -2.49382317e-02,  -4.05728693e-02,  -1.06769249e-02, ...,
         -1.51734991e-02,  -4.08438486e-02,  -3.24175058e-03],
       [ -3.14044702e-02,  -3.42676807e-02,  -1.29811736e-02, ...,
         -2.44964929e-02,  -4.18365236e-02,  -1.46504673e-02],
       [  6.21733201e+02,   6.16679912e+02,   6.04448193e+02, ...,
          5.81220071e+02,   5.96841066e+02,   5.98427247e+02]])

In [9]:
print(len(brbm.components_))

100


In [10]:
print(len(brbm.components_[0]))

1024


In [12]:
# Print the sum of the weights as the sum of the components of the first row of data
sum(brbm.components_[0])

606573.61210658413

In [13]:
# Run the model again, but with n_components = 2
X = greyscale

# Set up the model here
brbm = BernoulliRBM(n_components = 2)
brbm.fit(X)

BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10,
       random_state=None, verbose=0)

In [14]:
print(len(brbm.components_))

2


In [15]:
brbm.components_

array([[ 616.5203446 ,  611.61598377,  599.48574499, ...,  578.63773564,
         594.19371771,  595.82701739],
       [ 613.51878048,  608.91479246,  596.83117841, ...,  578.55500476,
         593.300893  ,  594.21054115]])

In [22]:
# Run the model again, but with n_components = 10
X = greyscale

# Set up the model here
brbm = BernoulliRBM(n_components = 10)
brbm.fit(X)

BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=10, n_iter=10,
       random_state=None, verbose=0)

In [23]:
brbm.components_

array([[ 610.8110174 ,  606.26232802,  593.90169323, ...,  576.2717419 ,
         590.97226756,  591.93135191],
       [ 619.49120943,  614.21809048,  601.36929957, ...,  580.68057634,
         596.15064995,  597.5895623 ],
       [ 621.99613573,  616.73302095,  603.87767327, ...,  581.43801303,
         596.95917172,  598.40582436],
       ..., 
       [ 623.65421554,  618.57646238,  606.14560966, ...,  584.07902007,
         599.57888129,  600.87712473],
       [ 621.11152859,  616.02885417,  603.58688378, ...,  583.33281888,
         598.79303019,  600.06186167],
       [ 612.33982003,  607.76804516,  595.26246202, ...,  575.41914653,
         590.3405592 ,  591.53884076]])

# The model reduced the dimensionality from 1024 features to 10 features, each of which is a matrix of weights representing the extracted components in regards to the original features. 