# Assignment 1
MLP Model Creation

All the imports required

In [1]:
# importing sys
import sys

# add the path of the Assignment_1 folder to the sys.path
sys.path.append('Assignment_1')

import numpy as np
from Utils import GetData
from Tests import PickleTest, FeatureVectorGeneratorTest
from PreProcessor import FeatureVectorGenerator
from PreProcessor.unpickle import unpickle
from Model.mlp_model import FCLayer, ActivationLayer, Network
from Model.activation_error_functions import relu, relu_prime, softmax, softmax_prime, cross_entropy, cross_entropy_prime

## Preparing the Data


In [2]:
# importing the raw data by unpickling the data
raw_data_1 = GetData.get_train_data(1)
raw_data_2 = GetData.get_train_data(2)
raw_data_3 = GetData.get_train_data(3)
raw_data_4 = GetData.get_train_data(4)
raw_data_5 = GetData.get_train_data(5)

raw_labels = GetData.get_labels()

raw_test_data = GetData.get_test_data()

# Testing the input data
PickleTest.test_data(raw_data_1)
PickleTest.test_data(raw_data_2)
PickleTest.test_data(raw_data_3)
PickleTest.test_data(raw_data_4)
PickleTest.test_data(raw_data_5)
PickleTest.test_data(raw_test_data)

PickleTest.test_labels(raw_labels)

x_train_1 = np.zeros((raw_data_1[b'data'].shape[0], 512)).astype(np.float32)
x_train_2 = np.zeros((raw_data_2[b'data'].shape[0], 512)).astype(np.float32)
x_train_3 = np.zeros((raw_data_3[b'data'].shape[0], 512)).astype(np.float32)
x_train_4 = np.zeros((raw_data_4[b'data'].shape[0], 512)).astype(np.float32)
x_train_5 = np.zeros((raw_data_5[b'data'].shape[0], 512)).astype(np.float32)

x_test = np.zeros((raw_data_1[b'data'].shape[0], 512)).astype(np.float32)

# declaring the batch size
batch_size = 1024

for i in range(0, raw_data_1[b'data'].shape[0], batch_size):
    x_train_1[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_data_1[b'data'][i:i+batch_size])
    x_train_2[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_data_2[b'data'][i:i+batch_size])
    x_train_3[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_data_3[b'data'][i:i+batch_size])
    x_train_4[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_data_4[b'data'][i:i+batch_size])
    x_train_5[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_data_5[b'data'][i:i+batch_size])

    x_test[i:i+batch_size] = FeatureVectorGenerator.generate_feature_vector(raw_test_data[b'data'][i:i+batch_size])

# one hot encoding the data to get y matrix
y_train_1 = FeatureVectorGenerator.one_hot_encoding(raw_data_1[b'labels'])
y_train_2 = FeatureVectorGenerator.one_hot_encoding(raw_data_2[b'labels'])
y_train_3 = FeatureVectorGenerator.one_hot_encoding(raw_data_3[b'labels'])
y_train_4 = FeatureVectorGenerator.one_hot_encoding(raw_data_4[b'labels'])
y_train_5 = FeatureVectorGenerator.one_hot_encoding(raw_data_5[b'labels'])

y_test = FeatureVectorGenerator.one_hot_encoding(raw_test_data[b'labels'])

y_train_raw1 = raw_data_1[b'labels']
y_train_raw2 = raw_data_2[b'labels']
y_train_raw3 = raw_data_3[b'labels']
y_train_raw4 = raw_data_4[b'labels']
y_train_raw5 = raw_data_5[b'labels']

y_test_raw = raw_test_data[b'labels']

# Testing the feature vector and one hot encoded labels
FeatureVectorGeneratorTest.test_feature_vector_data(raw_data_1[b'data'], x_train_1)
FeatureVectorGeneratorTest.test_feature_vector_data(raw_data_2[b'data'], x_train_2)
FeatureVectorGeneratorTest.test_feature_vector_data(raw_data_3[b'data'], x_train_3)
FeatureVectorGeneratorTest.test_feature_vector_data(raw_data_4[b'data'], x_train_4)
FeatureVectorGeneratorTest.test_feature_vector_data(raw_data_5[b'data'], x_train_5)

FeatureVectorGeneratorTest.test_feature_vector_data(raw_test_data[b'data'], x_test)

FeatureVectorGeneratorTest.test_one_hot_encoding(raw_data_1[b'labels'], y_train_1)
FeatureVectorGeneratorTest.test_one_hot_encoding(raw_data_2[b'labels'], y_train_2)
FeatureVectorGeneratorTest.test_one_hot_encoding(raw_data_3[b'labels'], y_train_3)
FeatureVectorGeneratorTest.test_one_hot_encoding(raw_data_4[b'labels'], y_train_4)
FeatureVectorGeneratorTest.test_one_hot_encoding(raw_data_5[b'labels'], y_train_5)

FeatureVectorGeneratorTest.test_one_hot_encoding(raw_test_data[b'labels'], y_test)



AssertionError: Feature Vector Data is greater than 1

## Training the Model

In [None]:
# network
net = Network()
net.add(FCLayer(512, 64))
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(64, 64))
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(64, 10))
net.add(ActivationLayer(softmax, softmax_prime))

# train
net.use(cross_entropy, cross_entropy_prime)
net.fit(x_train, y_train, epochs=10, learning_rate=0.1)

## Using the Model to Predict the test data

In [None]:
y_hat = net.predict(x_test)

from sklearn import metrics


print("Train set Accuracy: ", metrics.accuracy_score(y_train, net.predict(x_train)))
print("Test set Accuracy: ", metrics.accuracy_score(y_test, y_hat))

# Training The Machine Learning models Using the scikit learn package

1. SVM

In [8]:
from sklearn import svm

epochs = 10

svm_model = svm.SVC()
for i in range(epochs):
    svm_model.fit(x_train_1, y_train_raw1)
    svm_model.fit(x_train_2, y_train_raw2)
    svm_model.fit(x_train_3, y_train_raw3)
    svm_model.fit(x_train_4, y_train_raw4)
    svm_model.fit(x_train_5, y_train_raw5)

svm_predict = svm_model.predict(x_test)
print("SVM Model Trained")

from sklearn.metrics import f1_score, jaccard_score, accuracy_score

print("Train set Accuracy: ", accuracy_score(y_train_raw1, svm_model.predict(x_train_1)))
print("Test set Accuracy: ", accuracy_score(y_test_raw, svm_predict))
print("f1 score:",f1_score(y_test_raw, svm_predict, average='weighted'))
print("jaccard score:",jaccard_score(y_test_raw, svm_predict, average = "weighted"))

SVM Model Trained
Train set Accuracy:  0.3893
Test set Accuracy:  0.3895
f1 score: 0.38233072206707175
jaccard score: 0.23981269418893864


2. KNN Classifier

In [7]:
from sklearn.neighbors import KNeighborsClassifier
epochs = 10
KNN_model = KNeighborsClassifier()
for i in range(epochs):
    KNN_model.fit(x_train_1, y_train_1)
    KNN_model.fit(x_train_2, y_train_2)
    KNN_model.fit(x_train_3, y_train_3)
    KNN_model.fit(x_train_4, y_train_4)
    KNN_model.fit(x_train_5, y_train_5)

KNN_predict = KNN_model.predict(x_test)
print("KNN Model Trained")

from sklearn.metrics import f1_score, jaccard_score, accuracy_score

print("Train set Accuracy: ", accuracy_score(y_train_1, KNN_model.predict(x_train_1)))
print("Test set Accuracy: ", accuracy_score(y_test, KNN_predict))
print("f1 score:",f1_score(y_test, KNN_predict, average='weighted'))
print("jaccard score:",jaccard_score(y_test, KNN_predict, average = "weighted"))

KNN Model Trained
Train set Accuracy:  0.1469
Test set Accuracy:  0.1533
f1 score: 0.22191751443828625
jaccard score: 0.12722637305224943
