In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
np.random.seed(0)
epsilon = 1e-15

In [3]:
import torchvision.datasets as ds
import torchvision.transforms as transforms


train_validation_dataset = ds.EMNIST(root='./data', split='letters',
                              train=True,
                              transform=transforms.ToTensor(),
                              download=False)


In [20]:
# check the unique classes of the dataset
print(train_validation_dataset.classes)


['N/A', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
independent_test_dataset = ds.EMNIST(root='./data',
                       split='letters',
                             train=False,
                             transform=transforms.ToTensor())

In [5]:
train_validation_dataset

Dataset EMNIST
    Number of datapoints: 124800
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
independent_test_dataset

Dataset EMNIST
    Number of datapoints: 20800
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
# import numpy as np

def one_hot_encode(labels, num_classes):
    num_samples = len(labels)
    one_hot_labels = np.zeros((num_samples, num_classes))
    
    for i, label in enumerate(labels):
        one_hot_labels[i, label - 1] = 1  # Adjust indexing to start from 0
    
    return one_hot_labels

# # Example usage:
# labels = np.array([0, 1, 2, 1, 0])
# num_classes = 3
# one_hot_labels = one_hot_encode(labels, num_classes)

# print(one_hot_labels)


In [8]:
def loss_func(Y_pred, Y, loss_type='binary'):
    m = Y.shape[1]
    
    # clipping to avoid zero log
    epsilon = 1e-8
    np.clip(Y_pred, epsilon, 1 - epsilon, out=Y_pred)
    
    if loss_type == 'binary':
        loss = -(1/m) * np.sum(Y * np.log(Y_pred) + (1 - Y) * np.log(1-Y_pred))
    elif loss_type == 'multiclass':
        loss = -(1/m) * np.sum(Y * np.log(Y_pred.T))
    else:
        raise ValueError("Invalid loss_type. Expected 'binary' or 'multiclass'")
    
    return loss  

In [9]:
def relu(x):
    return np.maximum(0, x)

def relu_dZ(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    
    return dZ

In [10]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_dZ(dA, Z):
    g_z = sigmoid(Z)
    dZ = dA * g_z * (1 - g_z)
    
    return dZ

In [11]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) 

def softmax_dZ(dA, Z):
    g_z = softmax(Z)
    dZ = dA * g_z * (1 - g_z)
    
    return dZ

In [12]:
def single_layer_forward_prop(W, b, A_prev, activation=relu):
    Z = np.dot(W, A_prev) + b
    A = activation(Z)
    cache = (W, b, A_prev, Z, A)
    
    return A, cache

In [13]:
def deep_forward_prop(X, parameters, loss_type='binary'):
    A_prev = X
    L = len(parameters) // 2
    
    if loss_type == 'binary':
        activation = sigmoid
    elif loss_type == 'multiclass':
        activation = softmax
    
    all_caches = []
    for i in range(1, L):
        W_i = parameters['W' + str(i)]
        b_i = parameters['b' + str(i)]
        A_prev, cache = single_layer_forward_prop(W_i, b_i, A_prev)
        
        all_caches.append(cache)
        
    W_Last = parameters['W' + str(L)]
    b_Last = parameters['b' + str(L)]
    A_Last, cache = single_layer_forward_prop(W_Last, b_Last, A_prev, activation=activation)
    all_caches.append(cache)
    
    return A_Last, all_caches

In [14]:
def single_layer_backward_prop(dA, cache, activation_dZ=relu_dZ):
    W, b, A_prev, Z, A = cache
    m = A_prev.shape[1]
    
    dZ = activation_dZ(dA, Z)
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db

In [15]:
def compute_last_layer_dAL(Y, AL, loss_type='binary'):
    if loss_type == 'binary':
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    elif loss_type == 'multiclass':
        dAL = AL - Y
    else:
        raise ValueError("Invalid loss_type")
    
    return dAL

In [16]:
def deep_backward_prop(A_last, Y, all_caches, loss_type='binary'):
    L = len(all_caches)
    
    # needs to be changed for multiclass classification
    dAL_last_layer = compute_last_layer_dAL(Y, A_last, loss_type=loss_type)
    cache_last_layer = all_caches[L-1]
    last_activation_dZ = sigmoid_dZ
    
    if loss_type == 'multiclass':
        last_activation_dZ = softmax_dZ
    
    gradients = {}
    dA_prev, dW, db = single_layer_backward_prop(dAL_last_layer, cache_last_layer, activation_dZ=last_activation_dZ)
    gradients['dA' + str(L-1)] = dA_prev
    gradients['dW' + str(L)] = dW
    gradients['db' + str(L)] = db
    
    for i in reversed(range(L-1)):
        dA_prev, dW, db = single_layer_backward_prop(gradients['dA' + str(i+1)], all_caches[i])
        gradients['dA' + str(i)] = dA_prev
        gradients['dW' + str(i+1)] = dW
        gradients['db' + str(i+1)] = db
        
    return gradients

In [17]:
def gradient_descent(parameters, gradients, learning_rate):
    L = len(parameters) // 2
    
    for i in range(L):
        # retrieve gradients
        dW_i = gradients['dW' + str(i+1)]
        db_i = gradients['db' + str(i+1)]
        
        # update parameters
        parameters['W' + str(i+1)] -= learning_rate * dW_i
        parameters['b' + str(i+1)] -= learning_rate * db_i
        
    return parameters

In [18]:
def random_initialize_parameters(layer_nodes_list):
    parameters = {}
    L = len(layer_nodes_list)
    
    for layer_i in range(1, L):
        parameters['W' + str(layer_i)] = np.random.randn(layer_nodes_list[layer_i], layer_nodes_list[layer_i-1]) 
        parameters['b' + str(layer_i)] = np.zeros((layer_nodes_list[layer_i], 1))
        
    return parameters

In [19]:
def L_layer_FNN_model(layer_nodes_list, X, Y, learning_rate=0.01, epochs=3000, loss_type='binary', print_cost=False):
    parameters = random_initialize_parameters(layer_nodes_list)
    costs = []
    L = len(layer_nodes_list)
    
    for i in range(epochs):
        A_last, all_caches = deep_forward_prop(X, parameters)
        cost = loss_func(A_last, Y, loss_type=loss_type)
        # calculate accuracy
        
        gradients = deep_backward_prop(A_last, Y, all_caches, loss_type=loss_type)
        
        parameters = gradient_descent(parameters, gradients, learning_rate)
        
        if print_cost and i % 100 == 0:
            print("Cost after iteration {}: {}".format(i, cost))
            #print("final output: {}".format(A_last))
    
            costs.append(cost)
            
    return parameters, costs

In [62]:
def accuracy(A_last, Y):
   m = Y.shape[1]
   return np.sum(A_last == Y) / m



# #checking if the model is working with simple data and multiclass classfication
# X = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
# Y = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])


# parameters, costs = L_layer_FNN_model([1, 10, 10], X, Y, learning_rate=0.01, epochs=1000, print_cost=True)

# #test on example test data
# X_test = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
# Y_test = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

# A_last, _ = deep_forward_prop(X_test, parameters)
# print(A_last)

# import matplotlib.pyplot as plt
# plt.plot(costs)
# plt.show()

#checking if the model is working with simple data and binary classfication
X = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
Y = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]])

parameters, costs = L_layer_FNN_model([1, 10, 10], X, Y, learning_rate=0.1, epochs=10000, print_cost=True, loss_type='binary')


#test on example test data
X_test = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
Y_test = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]])

A_last, _ = deep_forward_prop(X_test, parameters)
print(A_last)


#confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = np.round(A_last)
y_pred = y_pred.astype(int)
y_test = Y_test.astype(int)
confusion_matrix(y_test[0], y_pred[0])




Cost after iteration 0: 9.400085515775217
Cost after iteration 100: 6.098798661713403
Cost after iteration 200: 5.648562623837844
Cost after iteration 300: 5.271846442406679
Cost after iteration 400: 4.954005455085699
Cost after iteration 500: 4.683263107603634
Cost after iteration 600: 4.450363284222019
Cost after iteration 700: 4.248083113282717
Cost after iteration 800: 4.070789772508327
Cost after iteration 900: 3.914074493403994
Cost after iteration 1000: 3.774465023178693
Cost after iteration 1100: 3.649205525236702
Cost after iteration 1200: 3.5360901826750304
Cost after iteration 1300: 3.4333379383546347
Cost after iteration 1400: 3.339498131718262
Cost after iteration 1500: 3.2533791373573795
Cost after iteration 1600: 3.1739940877213986
Cost after iteration 1700: 3.100519305913565
Cost after iteration 1800: 3.0322622337774385
Cost after iteration 1900: 2.9686364940746
Cost after iteration 2000: 2.909142348227082
Cost after iteration 2100: 2.8533512638811214
Cost after iterati

array([[5, 0],
       [0, 5]], dtype=int64)

In [None]:

# Assuming you have the functions defined in the previous code

# Function to convert PyTorch dataset to NumPy arrays
def convert_to_numpy(dataset):
    features = []
    labels = []

    for data in dataset:
        image, label = data
        features.append(image.numpy().flatten())
        labels.append(label)

    return np.array(features).T, np.array(labels).reshape(1, -1)

# Load EMNIST datasets
train_features, train_labels = convert_to_numpy(train_validation_dataset)
test_features, test_labels = convert_to_numpy(independent_test_dataset)

# Normalize features
train_features = train_features / 255.0
test_features = test_features / 255.0

# One-hot encode labels for multiclass classification
num_classes = len(np.unique(train_labels))
train_labels_one_hot = np.eye(num_classes)[train_labels.flatten() - 1].T  # Corrected indexing

# Set hyperparameters
layer_nodes_list = [train_features.shape[0], 128, 64, num_classes]
learning_rate = 0.01
epochs = 1000

# Train the model
parameters, costs = L_layer_FNN_model(layer_nodes_list, train_features, train_labels_one_hot, learning_rate, epochs, loss_type='multiclass', print_cost=True)

# Test the model
def predict(parameters, X):
    A_last, _ = deep_forward_prop(X, parameters, loss_type='multiclass')
    predictions = np.argmax(A_last, axis=0)
    return predictions.reshape(1, -1)

# Evaluate on the test set
test_predictions = predict(parameters, test_features)
accuracy = np.mean(test_predictions == test_labels)
print("Test Accuracy: {:.2%}".format(accuracy))


In [21]:
# Load the datasets
train_validation_dataset = ds.EMNIST(root='./data', split='letters',
                                     train=True,
                                     transform=transforms.ToTensor(),
                                     download=False)

independent_test_dataset = ds.EMNIST(root='./data',
                                     split='letters',
                                     train=False,
                                     transform=transforms.ToTensor())

# Convert the datasets to numpy arrays
X_train = train_validation_dataset.data.numpy()
Y_train = train_validation_dataset.targets.numpy()
X_test = independent_test_dataset.data.numpy()
Y_test = independent_test_dataset.targets.numpy()

num_classes = 26

# One-hot encode labels for training and testing
Y_train_one_hot = one_hot_encode(Y_train, num_classes)
Y_test_one_hot = one_hot_encode(Y_test, num_classes)

# Reshape the input data
X_train = X_train.reshape(X_train.shape[0], -1).T
X_test = X_test.reshape(X_test.shape[0], -1).T

# Normalize the input data
X_train = X_train / 255.0
X_test = X_test / 255.0



In [23]:
#print train and test
print(Y_train_one_hot.label)    
# print(Y_train.shape)
# print(X_test.shape)
# print(Y_test.shape)

AttributeError: 'numpy.ndarray' object has no attribute 'label'

In [80]:
# Define the layer nodes list
layer_nodes_list = [X_train.shape[0], 128, 64, 26]  # Example architecture

# Train the model
parameters, costs = L_layer_FNN_model(layer_nodes_list, X_train, Y_train_one_hot, learning_rate=0.01, epochs=3000, loss_type='multiclass', print_cost=True)

# Test the model
A_last, _ = deep_forward_prop(X_test, parameters)
# Perform further evaluation or analysis on the test results

ValueError: operands could not be broadcast together with shapes (26,124800) (124800,26) 