In [230]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

In [17]:
# import the csv as a pandas dataframe 
df = pd.read_csv("devanagari_df.csv", index_col=0)

In [134]:
"""
    Defines the Feed Forward Neural Network Module.
    This module takes in the number of hidden layers, the nodes per layer, the training data, and the activation function,
        and outputs the model to be trained using its forward method.
    Args:
        num_hidden_layers (Int): the number of hidden layers of the neural network (doesn't include the input/output layers)
        nodes_per_layer (List[Int]): the number of nodes per layer. The last one should be 1, and the length should be (2 + num_hidden_layers)
        X_trn (Tensor([Double, Double])): a tensor representing the training data for the model
        activation_function (Function): Must be one of torch.nn.Sigmoid(), torch.nn.ReLU(), torch.tanh(), torch.nn.Identity()
    Returns:
        The Feed Forward Neural Network model to be trained
"""
class Feed_Forward(torch.nn.Module):
    def __init__(self, num_hidden_layers, nodes_per_layer, activation_function):
        super(Feed_Forward, self).__init__()
        # assert Module inputs adhere to constraints
        assert(num_hidden_layers == len(nodes_per_layer) - 2)

        self.num_hidden_layers = num_hidden_layers
        self.nodes_per_layer = nodes_per_layer
        self.activation_function = activation_function
        self.layers = []
        # define the first layer as going from the initial data count to the first hidden layer's node count
        self.fc1 = torch.nn.Linear(nodes_per_layer[0], nodes_per_layer[1])
        # Append the first layer and activation function to this Module
        self.layers.append(self.fc1)
        self.layers.append(activation_function)

        # Append layers to the model for each hidden layer
        for i in range(num_hidden_layers - 1):
            self.layers.append(torch.nn.Linear(self.nodes_per_layer[i + 1], self.nodes_per_layer[i + 2]))
            self.layers.append(activation_function)

        # Append the output layer to the model
        self.output_layer = torch.nn.Linear(self.nodes_per_layer[-2], self.nodes_per_layer[-1])
        self.output_activation_sigmoid = torch.nn.Sigmoid()
        self.layers.append(self.output_layer)
        self.layers.append(self.output_activation_sigmoid)

    # runs a step through the model, generating the output from this epoch of training
    def forward(self, x):
        model = torch.nn.Sequential(*self.layers)
        return model.forward(x)

In [178]:
def train_model(model, epochs, X_trn, Y_trn):
    """
        Trains the given model, performing the given number of epochs over the given Training and Testing data.
        Args:
            model (torch.nn.Module): The Feed Forward Neural Network to be trained
            epochs (int): The total number of epochs to train over
            X_trn (Tensor([Double, Double])): the training features
            Y_trn (Tensor(Int)): the training target
    """
    # define a loss criteria for the model (use Cross-Entropy Loss, as this is a classification problem)
    criterion = torch.nn.CrossEntropyLoss()
    # define an optimizer for the model (use Stochastic Gradient Descent)
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    # set the model to train
    model.train()
    # set default value for final layer output
    final_layer_output = 0
    # for every epoch, train the model
    for epoch in range(epochs):
        optimizer.zero_grad()
        # Move forward through the model
        y_pred = model(X_trn)
        #y_pred = y_pred.type(torch.LongTensor)

        # if this is the last epoch, we save the predictions
        if(epoch == epochs - 1):
            final_layer_output = y_pred.round()
        # Compute the loss
        loss = criterion(y_pred, Y_train)
        # Back-propagate to adjust weights
        loss.backward()
        # Step through model
        optimizer.step()
    return model, final_layer_output


In [117]:
def create_and_train_ff_nn(num_hidden_layers, nodes_per_layer, X_trn, Y_trn, activation_function, epochs):
    """
        Creates a Feed Forward Neural Network, and then trains it over the specified number of epochs
        Args:
            num_hidden_layers (Int): the total number of hidden layers for the neural network, either 1 or 2
            nodes_per_layer (List[Int]): the number of nodes for each layer of the neural network
            dataset (List[(Double, Double), Int]): the training dataset for the model
            activation_function (Function): the activation function used
            epochs (Int): the number of epochs to train over
        Returns:
            the trained model

    """
    # define the feed forward model
    model = Feed_Forward(num_hidden_layers, nodes_per_layer, activation_function)
    # train the model and get the final output
    model, final_layer_output = train_model(model, epochs, X_trn, Y_trn)
    return model

In [231]:
def test_model(model, X_trn, Y_trn, X_tst, Y_tst):
    # testing, so don't want to use a gradient
    with torch.no_grad():
        # testing the model
        model.eval()
        # make the predictions
        y_pred = model(X_trn)
        y_pred = torch.argmax(y_pred, dim=-1).cpu().detach().numpy()
        # find the accuracy for training and testing
        training_accuracy = accuracy_score(Y_trn, y_pred)
        y_pred = model(X_tst)
        y_pred = torch.argmax(y_pred, dim=-1).cpu().detach().numpy()
        testing_accuracy = accuracy_score(Y_tst, y_pred)
        return training_accuracy, testing_accuracy

In [121]:
features = df.iloc[:, :-1]
target = df.iloc[:, -1]
target = [str(x) for x in target]
label_encoder = LabelEncoder()
target = label_encoder.fit_transform(target)

In [164]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target)
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
# turn the training and testing data into tensors
X_train = torch.FloatTensor(X_train)
Y_train = torch.LongTensor(Y_train)
X_test = torch.FloatTensor(X_test)
Y_test = torch.LongTensor(Y_test)f

In [None]:
cross_val_score(neural_network, features, target, cv=3)

In [184]:
nodes_per_layer_list = [[X_train.shape[1], 100, 46], [X_train.shape[1], 100, 100, 46], [X_train.shape[1], 500, 100, 46], [X_train.shape[1], 100, 100, 100, 46], [X_train.shape[1], 100, 100, 100, 100, 46]]
hidden_layers_list  = [len(x) - 2 for x in nodes_per_layer_list]

[1, 2, 2, 3, 4]

In [233]:
epochs = 100
a = torch.nn.Sigmoid()
n_splits = 5
#activation_functions_list = [torch.nn.ReLU(), torch.nn.Sigmoid()]
kf = StratifiedKFold(n_splits=n_splits, random_state=4400, shuffle=True)
for i, nodes_per_layer in enumerate(nodes_per_layer_list):
    num_hidden_layers = hidden_layers_list[i]
    training_count = 0.0
    testing_count = 0.0
    print("Running model with ", num_hidden_layers, "hidden layers: ", nodes_per_layer)
    for train_index, test_index in kf.split(features, target):
        X_train, X_test = features[train_index[0]:train_index[-1]], features[test_index[0]:test_index[-1]]
        Y_train, Y_test = target[train_index[0]:train_index[-1]], target[test_index[0]:test_index[-1]]
        X_train = X_train.to_numpy()
        X_test = X_test.to_numpy()
        # turn the training and testing data into tensors
        X_train = torch.FloatTensor(X_train)
        Y_train = torch.LongTensor(Y_train)
        X_test = torch.FloatTensor(X_test)
        Y_test = torch.LongTensor(Y_test)

        #for a in activation_functions_list:
        model = create_and_train_ff_nn(num_hidden_layers, nodes_per_layer, X_train, Y_train, a, epochs)
        curr_training_count, curr_testing_count = test_model(model, X_train, Y_train, X_test, Y_test)
        training_count = training_count + curr_training_count
        testing_count = testing_count + curr_testing_count
    training_accuracy = training_count / n_splits
    testing_accuracy = testing_count / n_splits
    print("\tAverage training accuracy: ", training_accuracy)
    print("\tAverage testing accuracy: ", testing_accuracy, "\n")

Running model with  1 hidden layers:  [1024, 100, 46]
	Average training accuracy:  0.6660398237541078
	Average testing accuracy:  0.6660340108037875 

Running model with  2 hidden layers:  [1024, 100, 100, 46]
	Average training accuracy:  0.5559075185532732
	Average testing accuracy:  0.5559032428013941 

Running model with  2 hidden layers:  [1024, 500, 100, 46]


KeyboardInterrupt: 

In [None]:
ReLU:
Running model with  1 hidden layers:  [1024, 100, 46]
	Training accuracy:  0.5156811594202898
	Testing accuracy:  0.5025652173913043
Running model with  2 hidden layers:  [1024, 100, 100, 46]
	Training accuracy:  0.10592753623188406
	Testing accuracy:  0.104
Running model with  2 hidden layers:  [1024, 500, 100, 46]
	Training accuracy:  0.07572463768115942
	Testing accuracy:  0.07152173913043479
Running model with  3 hidden layers:  [1024, 100, 100, 100, 46]
	Training accuracy:  0.28982608695652173
	Testing accuracy:  0.2658695652173913
Running model with  4 hidden layers:  [1024, 100, 100, 100, 100, 46]
	Training accuracy:  0.36144927536231886
	Testing accuracy:  0.34991304347826085

In [None]:
Sigmoid:
Running model with  1 hidden layers:  [1024, 100, 46]
	Training accuracy:  0.7673188405797101
	Testing accuracy:  0.7322608695652174
Running model with  2 hidden layers:  [1024, 100, 100, 46]
	Training accuracy:  0.6775072463768116
	Testing accuracy:  0.6507391304347826
Running model with  2 hidden layers:  [1024, 500, 100, 46]
	Training accuracy:  0.802695652173913
	Testing accuracy:  0.759695652173913
Running model with  3 hidden layers:  [1024, 100, 100, 100, 46]
	Training accuracy:  0.5211159420289855
	Testing accuracy:  0.5023478260869565
Running model with  4 hidden layers:  [1024, 100, 100, 100, 100, 46]
	Training accuracy:  0.25714492753623186
	Testing accuracy:  0.24178260869565218