### Image Analysis 

In [None]:
#Import the Pillow library if needed

#import sys
#!conda install -c anaconda pillow --yes

In [1]:
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.manifold import Isomap
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Reading the file that contains the file/image paths

imagePath = pd.read_csv('Images.csv', header = None)
total_images = len(imagePath)

In [3]:
# Scaling,resizing and flattening the images

image_arrays = list()
names = list()
scale_percent = 0.5
for i in range(total_images):
    for path in imagePath.iloc[i]:
        name = path[7:-13]
        names.append(name)
        colourImg = Image.open(path)

        width, height = colourImg.size
        
        width = int(width * scale_percent / 100)
        height = int(height * scale_percent / 100)
        colourImg = colourImg.resize((width, height))

        colourPixels = colourImg.convert("RGB")
        colourArray = np.array(colourPixels.getdata()).reshape(colourImg.size + (3,))
        indicesArray = np.moveaxis(np.indices(colourImg.size), 0, 2)
        allArray = np.dstack((indicesArray, colourArray)).reshape((-1, 5))

        image = pd.DataFrame(allArray, columns=["row", "col", "red","green","blue"])
        image = image.drop(['row','col'], axis = 1)
        image = image.values.flatten()
        image_arrays.append(image)
        

In [4]:
pixels = height * width
column_names = list()
for i in range(pixels):
    red, green, blue = '{}_red'.format(i), '{}_green'.format(i), '{}_blue'.format(i)
    column_names.append(red)
    column_names.append(green)
    column_names.append(blue)

In [5]:
images = pd.DataFrame()
for image in image_arrays:
    image = pd.Series(image, index = column_names)
    images = images.append(image, ignore_index=True)

In [6]:
images['Name'] = names
images.head()

Unnamed: 0,0_blue,0_green,0_red,100_blue,100_green,100_red,101_blue,101_green,101_red,102_blue,...,98_blue,98_green,98_red,99_blue,99_green,99_red,9_blue,9_green,9_red,Name
0,87.0,76.0,68.0,28.0,29.0,33.0,42.0,42.0,42.0,44.0,...,114.0,98.0,151.0,102.0,85.0,136.0,67.0,66.0,68.0,Anjali
1,101.0,89.0,83.0,30.0,28.0,27.0,38.0,40.0,40.0,43.0,...,112.0,96.0,149.0,104.0,86.0,139.0,70.0,68.0,68.0,Anjali
2,97.0,84.0,76.0,34.0,31.0,27.0,41.0,41.0,41.0,40.0,...,112.0,97.0,148.0,101.0,86.0,137.0,70.0,68.0,68.0,Anjali
3,94.0,84.0,77.0,36.0,36.0,36.0,37.0,40.0,38.0,41.0,...,106.0,88.0,141.0,101.0,84.0,135.0,70.0,70.0,70.0,Anjali
4,94.0,85.0,76.0,36.0,29.0,26.0,38.0,37.0,39.0,39.0,...,101.0,83.0,136.0,101.0,87.0,135.0,72.0,69.0,71.0,Anjali


In [7]:
# Converting categorical value(our names) into numeric values using Map function

name_map={'Anjali':0,'Sandhya':1, 'David':2, 'Jibran':3}
images['Name']=images['Name'].map(name_map)
    
images.head()

Unnamed: 0,0_blue,0_green,0_red,100_blue,100_green,100_red,101_blue,101_green,101_red,102_blue,...,98_blue,98_green,98_red,99_blue,99_green,99_red,9_blue,9_green,9_red,Name
0,87.0,76.0,68.0,28.0,29.0,33.0,42.0,42.0,42.0,44.0,...,114.0,98.0,151.0,102.0,85.0,136.0,67.0,66.0,68.0,0
1,101.0,89.0,83.0,30.0,28.0,27.0,38.0,40.0,40.0,43.0,...,112.0,96.0,149.0,104.0,86.0,139.0,70.0,68.0,68.0,0
2,97.0,84.0,76.0,34.0,31.0,27.0,41.0,41.0,41.0,40.0,...,112.0,97.0,148.0,101.0,86.0,137.0,70.0,68.0,68.0,0
3,94.0,84.0,77.0,36.0,36.0,36.0,37.0,40.0,38.0,41.0,...,106.0,88.0,141.0,101.0,84.0,135.0,70.0,70.0,70.0,0
4,94.0,85.0,76.0,36.0,29.0,26.0,38.0,37.0,39.0,39.0,...,101.0,83.0,136.0,101.0,87.0,135.0,72.0,69.0,71.0,0


In [8]:
# Reducing the dimensionality using ISOMAP

iso = Isomap(n_components = 150)
X = images.loc[:, images.columns != 'Name']
Y = images['Name']
X_transformed = iso.fit_transform(X)

X_train, X_test, y_train,y_test = train_test_split(X_transformed, Y, test_size=0.3)

### Neural Network Classifier

In [9]:
# Using Neural Networks Classifier to predict

model =  MLPClassifier()
param_grid = {'hidden_layer_sizes': [[600,300],[400,600],[500,700]]}
cv = StratifiedKFold(n_splits=7, random_state=0, shuffle=True)
grid = GridSearchCV(model, param_grid, cv = cv, scoring='accuracy',
                    return_train_score=True)
grid.fit(X_transformed, Y)

print("Best Parameter: {}".format(grid.best_params_))
print("Best Cross Validation Score: {}".format(grid.best_score_))

Best Parameter: {'hidden_layer_sizes': [400, 600]}
Best Cross Validation Score: 0.9788461538461538


In [10]:
#Modeling the train dataset

model = MLPClassifier(solver='lbfgs',random_state=0, hidden_layer_sizes=[500,700])
model.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=[500, 700], learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=0, shuffle=True, solver='lbfgs', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [11]:
#Predicting the test and train datasets

y_train_hat  = model.predict(X_train)
y_test_hat  = model.predict(X_test)

#Finding the accuracy of the test and train datasets

in_sample_acc = accuracy_score(y_train,y_train_hat, normalize = True) * 100
out_of_sample_acc = accuracy_score(y_test,y_test_hat, normalize = True) * 100
print("In-sample Accuracy: ", in_sample_acc)
print("Out-of-sample Accuracy: ", out_of_sample_acc)

In-sample Accuracy:  24.45054945054945
Out-of-sample Accuracy:  21.794871794871796


### Random Forest Classifier

In [58]:
# Using Random Forest Classifier for prediction
from sklearn.ensemble import RandomForestClassifier

#from sklearn.ensemble import RandomForestRegressor

forest = RandomForestClassifier()



In [59]:
forest = RandomForestClassifier(n_estimators=400)


forest.fit(X_train,y_train)


y_train_hat  = forest.predict(X_train)
y_test_hat = forest.predict(X_test)



In [60]:
#Finding the accuracy of the test and train datasets

in_sample_acc = accuracy_score(y_train,y_train_hat, normalize = True) * 100
out_of_sample_acc = accuracy_score(y_test,y_test_hat, normalize = True) * 100
print("In-sample Accuracy: ", in_sample_acc)
print("Out-of-sample Accuracy: ", out_of_sample_acc)

In-sample Accuracy:  100.0
Out-of-sample Accuracy:  97.75641025641025


In [69]:
from sklearn.preprocessing import label_binarize

# Binarize the output
y = label_binarize(Y, classes=[0, 1, 2,3])
n_classes = y.shape[1]
print(n_classes)

4


In [73]:
from sklearn.multiclass import OneVsRestClassifier

classifier = OneVsRestClassifier(RandomForestClassifier(n_estimators=400))
y_score = classifier.fit(X_train, y_train).predict(X_test)
