##1. Build a BPN classifier for any kaggle dataset.

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

db = np.loadtxt("duke-breast-cancer.txt")
print("Database raw shape (%s,%s)" % np.shape(db))

Database raw shape (86,7130)




Now we have to shuffle it and then split it into training 90% and testing 10% so that the network can train itself better. 

In [None]:
np.random.shuffle(db)
y = db[:, 0]
x = np.delete(db, [0], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
print(np.shape(x_train),np.shape(x_test))

(77, 7129) (9, 7129)


Now we have to create the hidden layer vector, weight's matrix, output layer vector and the hidden weight's matrix. We choose the hidden layer to be made of a number of [72] hidden perceptrons. The output layer needs to have a number of perceptrons equal to the number of classes.

In [None]:
hidden_layer = np.zeros(72)
weights = np.random.random((len(x[0]), 72))
output_layer = np.zeros(2)
hidden_weights = np.random.random((72, 2))

To continue we need to implement:

    1. Sum function
    2. Activation function
    3. SoftMax function
    4. Recalculate Weights function
    5. Back-propagation function


**Sum function**

sum for [i]th perceptron from the layer

In [None]:
def sum_function(weights, index_locked_col, x):
    result = 0
    for i in range(0, len(x)):
        result += x[i] * weights[i][index_locked_col]
    return result


**Activation function**

activation for the [i]th perceptron from the layer. 

In [None]:
def activate_layer(layer, weights, x):
    for i in range(0, len(layer)):
        layer[i] = 1.7159 * np.tanh(2.0 * sum_function(weights, i, x) / 3.0)

**SoftMax function**

The softmax function, or normalized exponential function, is a generalization of the logistic function that "squashes" a K-dimensional vector z of arbitrary real values to a K-dimensional vector σ ( z ) of real values in the range (0, 1) that add up to 1.

In [None]:
def soft_max(layer):
    soft_max_output_layer = np.zeros(len(layer))
    for i in range(0, len(layer)):
        denominator = 0
        for j in range(0, len(layer)):
            denominator += np.exp(layer[j] - np.max(layer))
        soft_max_output_layer[i] = np.exp(layer[i] - np.max(layer)) / denominator
    return soft_max_output_layer

**Recalculate Weights function**

we tune the network weights and hidden weights matrix. We are going to use this inside the back propagation function.

In [None]:
def recalculate_weights(learning_rate, weights, gradient, activation):
    for i in range(0, len(weights)):
        for j in range(0, len(weights[i])):
            weights[i][j] = (learning_rate * gradient[j] * activation[i]) + weights[i][j]

**Back-propagation function**

we find out the output layer gradient and the hidden layer gradient to recalculate the network weights.

In [None]:
def back_propagation(hidden_layer, output_layer, one_hot_encoding, learning_rate, x):
    output_derivative = np.zeros(2)
    output_gradient = np.zeros(2)
    for i in range(0, len(output_layer)):
        output_derivative[i] = (1.0 - output_layer[i]) * output_layer[i]
    for i in range(0, len(output_layer)):
        output_gradient[i] = output_derivative[i] * (one_hot_encoding[i] - output_layer[i])
    hidden_derivative = np.zeros(72)
    hidden_gradient = np.zeros(72)
    for i in range(0, len(hidden_layer)):
        hidden_derivative[i] = (1.0 - hidden_layer[i]) * (1.0 + hidden_layer[i])
    for i in range(0, len(hidden_layer)):
        sum_ = 0
        for j in range(0, len(output_gradient)):
            sum_ += output_gradient[j] * hidden_weights[i][j]
        hidden_gradient[i] = sum_ * hidden_derivative[i]
    recalculate_weights(learning_rate, hidden_weights, output_gradient, hidden_layer)
    recalculate_weights(learning_rate, weights, hidden_gradient, x)

Now we one hot encode

converting the categorical data variables to be provided to machine and deep learning algorithms which in turn improve predictions as well as classification accuracy of a model

In [None]:
one_hot_encoding = np.zeros((2,2))
for i in range(0, len(one_hot_encoding)):
    one_hot_encoding[i][i] = 1
training_correct_answers = 0
for i in range(0, len(x_train)):
    activate_layer(hidden_layer, weights, x_train[i])
    activate_layer(output_layer, hidden_weights, hidden_layer)
    output_layer = soft_max(output_layer)
    training_correct_answers += 1 if y_train[i] == np.argmax(output_layer) else 0
    back_propagation(hidden_layer, output_layer, one_hot_encoding[int(y_train[i])], -1, x_train[i])
print("Correct answers while learning: %s / %s (Accuracy = %s) " % (training_correct_answers, len(x_train), 
                                                                                       training_correct_answers/len(x_train)))

Correct answers while learning: 53 / 77 (Accuracy = 0.6883116883116883) 




The accuracy of the test depends on the random generated weight's matrix and the learning rate. Using different learning rates and weight's will generate a different accuracy.


In [None]:
testing_correct_answers = 0
for i in range(0, len(x_test)):
    activate_layer(hidden_layer, weights, x_test[i])
    activate_layer(output_layer, hidden_weights, hidden_layer)
    output_layer = soft_max(output_layer)
    testing_correct_answers += 1 if y_test[i] == np.argmax(output_layer) else 0
print("Correct answers while testing: %s / %s (Accuracy = %s)" % (testing_correct_answers, len(x_test),
                                                                                     testing_correct_answers/len(x_test)))

Correct answers while testing: 5 / 9 (Accuracy = 0.5555555555555556)


##2. Build an RBF classifier for the same dataset and compare the results.

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
import math
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import validation_curve
from sklearn.model_selection import learning_curve

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

db = pd.read_csv("gender_submission.csv")
print("Database raw shape (%s,%s)" % np.shape(db))

FileNotFoundError: ignored

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.info()
# test.info

In [None]:
train['Title'] = train.Name.apply(lambda name: name.split(',')[1].split('.')[0].strip())
test['Title'] = test.Name.apply(lambda name: name.split(',')[1].split('.')[0].strip())
# normalize the titles
normalized_titles = {
    "Capt":       "Officer",
    "Col":        "Officer",
    "Major":      "Officer",
    "Jonkheer":   "Royalty",
    "Don":        "Royalty",
    "Sir" :       "Royalty",
    "Dr":         "Officer",
    "Rev":        "Officer",
    "the Countess":"Royalty",
    "Dona":       "Royalty",
    "Mme":        "Mrs",
    "Mlle":       "Miss",
    "Ms":         "Mrs",
    "Mr" :        "Mr",
    "Mrs" :       "Mrs",
    "Miss" :      "Miss",
    "Master" :    "Master",
    "Lady" :      "Royalty"
}

train.Title = train.Title.map(normalized_titles)
test.Title = train.Title.map(normalized_titles)

train.Title.replace(['Mr', 'Miss', 'Mrs', 'Master', 'Officer', 'Royalty'], [1, 2, 3, 4, 5, 6], inplace=True)
test.Title.replace(['Mr', 'Miss', 'Mrs', 'Master', 'Officer', 'Royalty'], [1, 2, 3, 4, 5, 6], inplace=True)


In [None]:
# Drop the name feature
train = train.drop('Name', 1)
test = test.drop('Name', 1)
# Encoding the features
train.Embarked.replace(['S', 'C', 'Q'], [1, 2, 3], inplace=True)
train.Sex.replace(['male', 'female'], [1, 2], inplace=True)
test.Embarked.replace(['S', 'C', 'Q'], [1, 2, 3], inplace=True)
test.Sex.replace(['male', 'female'], [1, 2], inplace=True)

# Temporary drop of some columns
train = train.drop('Ticket', 1)
train = train.drop('Cabin', 1)
test = test.drop('Ticket', 1)
test = test.drop('Cabin', 1)

#Fill NaN values of Age with the mean Age
train['Age'] = train['Age'].fillna(train['Age'].sum()/len(train))
test['Age'] = test['Age'].fillna(test['Age'].sum()/len(test))
#Drop other raw where there is a NaN value
train= train.dropna(how='any',axis=0)  
test = test.fillna(0)

train.info()

In [None]:
train

In [None]:
Data_train = train.values
Data_test = test.values

# m = number of input samples
m_train = len(Data_train)
m_test = len(Data_test)
# prediction for training
Ytrain = Data_train[:m_train,1]
# features for training
Xtrain = Data_train[:m_train,2:]
# features for testing
Xtest = Data_test[:m_test,1:]

parameters = {'C': [1, 10, 50, 100,200,300, 1000],'gamma':[0.0001,0.001,0.01,0.1,1.]}
#run SVM with rbf kernel
rbf_SVM = SVC(kernel='rbf')
# ADD CODE: DO THE SAME AS ABOVE FOR RBF KERNEL
clf = GridSearchCV(rbf_SVM,parameters,cv=5)
clf.fit(Xtrain,Ytrain)

print ('\n RESULTS FOR rbf KERNEL \n')

best_param = clf.best_params_
value_best_param_rbf_gammma = best_param['gamma']
value_best_param_rbf_c = best_param['C']
estim_best = clf.best_estimator_
print("Best Estimator: ", estim_best)
print("Best parameters set found:",best_param)

#get training and test error for the best SVM model from CV
best_SVM = SVC(C = value_best_param_rbf_c, gamma = value_best_param_rbf_gammma, kernel='rbf')

best_SVM.fit(Xtrain,Ytrain)

training_error = 1. - best_SVM.score(Xtrain,Ytrain)
print("Training error: ", training_error)

Ytest_predicted = best_SVM.predict(Xtest)

print("Accuracy =",1-training_error);

In [None]:
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
                        n_jobs=-1, train_sizes= np.linspace(.1, 1.0, 10)):
    """Generate a simple plot of the test and training learning curve"""
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    return plt

plot_learning_curve(estim_best,"SVC learning curves",Xtrain,Ytrain,cv=5)
plt.show()