In [566]:
seed = 65
learning_rate = 0.01

In [567]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.ndimage
import random

def displayImage():
    with open("traindata.txt","r") as f:
        data = f.readlines()
        index = random.randint(0,len(data))
        index = 3

        data = [float(i) for i in data[index].split(',')] # Your list of 1040 grayscale values followed by the rotation angle (as strings)

        # Ensure the length of the data list is correct
        if len(data) != 1041:
            raise ValueError("The data list must contain exactly 1041 values, with the last being the rotation angle.")
        
        # Separate the pixel values and the rotation angle
        rotation_angle = float(data[-1])
        pixels = [int(p) for p in data[:-1]]
        
        # Reshape the pixels array to match the desired dimensions, e.g., 40x26
        width, height = 40, 26
        image_array = np.array(pixels).reshape((height, width))
        
        # Apply the rotation to the image
        rotated_image = scipy.ndimage.rotate(image_array, rotation_angle, reshape=True)
        
        # Display the image using Matplotlib
        plt.imshow(rotated_image, cmap='gray', vmin=0, vmax=255)
        plt.colorbar()  # Optional: adds a colorbar to indicate the scale
        plt.title(f"Grayscale Image {index}(Rotated {rotation_angle} degrees)")
        plt.show()

In [568]:
def hot_one_encoding(number):
    nums = [0]*4
    nums[number]=1
    return nums

def min_max_scale(trainData,trainLabels):
    pixel_arrays = []
    pixels_labels = []
    for i in range(len(trainData)):
        pixels = list(map(float,trainData[i]))
        rotation_angle = pixels.pop()
        hotOneEncoding = hot_one_encoding(int(rotation_angle))

        min_val = min(pixels)
        max_val = max(pixels)
        
        for j in range(len(pixels)):
            pixels[j] = (pixels[j]-min_val)/(max_val-min_val)

        for j in hotOneEncoding:
            pixels.append(j)

        pixel_arrays.append(pixels)
        pixels_labels.append(int(trainLabels[i]))

    inputData = np.array(pixel_arrays)
    inputLabels = np.array(pixels_labels)

    return inputData,inputLabels

def noMinMax(trainData,trainLabels):
    pixel_arrays = []
    pixels_labels = []

    for i in range(len(trainData)):
        pixels = list(map(float,trainData[i]))
        rotation_angle = pixels.pop()
        hotOneEncoding = hot_one_encoding(int(rotation_angle))
        for j in hotOneEncoding:
            pixels.append(j)
        pixel_arrays.append(pixels)
        pixels_labels.append(int(trainLabels[i]))
    inputData = np.array(pixel_arrays)
    inputLabels = np.array(pixels_labels)

    return inputData,inputLabels

In [569]:
with open("traindata.txt","r") as f:
    trainData = [i.split(",") for i in f.readlines()]

with open("trainlabels.txt","r") as f:
    trainLabels = f.readlines()

inputData,inputLabels = min_max_scale(trainData,trainLabels)    


In [570]:
def removeOutlier(inputData1,inputLabels1):
    removeRow = False
    if (removeRow):
        rows = [ 68,  202,  234,  442,  479,  553,  564,  589, 1120, 1242, 1357, 1584,
        2017, 2588, 2724, 3117, 3396, 3414, 3603, 3747, 3845, 3916, 4351, 4479,
        4496, 4604, 4692, 4711, 4806, 4830]
        inputData1 = np.delete(inputData1,rows,axis=0)
        inputLabels1 = np.delete(inputLabels1,rows,axis=0)
    else:
        cols=[ 89, 172, 189, 210, 236, 287, 367, 383, 416, 448, 455, 460, 462, 467,
        531, 546, 565, 574, 653, 666, 678, 702, 723, 726, 764, 787, 791, 805,
        810, 818, 856, 861, 873, 892, 894, 924, 929, 964]
        
        inputData1 = np.delete(inputData, cols, axis=1)

    return inputData1,inputLabels1



In [571]:
from sklearn.model_selection import train_test_split

x_train, x_temp,y_train,y_temp = train_test_split(inputData,inputLabels,test_size=0.2,random_state=seed)
x_test, x_val,y_test,y_val, = train_test_split(x_temp,y_temp, test_size=0.5, random_state=seed)

In [572]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [573]:
def getTrainingCount():
    count = {i:0 for i in range(21)}

    for i in trainLabels:
        count[int(i)]+=1

    return count

data = getTrainingCount()

In [574]:
torch.manual_seed(seed)

<torch._C.Generator at 0x7f74a2ba3b70>

In [575]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [576]:
x_train = torch.FloatTensor(x_train)
x_test = torch.FloatTensor(x_test)
x_val = torch.FloatTensor(x_val)

y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
y_val = torch.LongTensor(y_val)

In [577]:
class Model(nn.Module):
    #h1=512,h2=128,out=21 => 60% with dropout of 50% ;in was 1044
    def __init__(self,in_features=1044,h1=512,h2=128,out=21):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Linear(in_features,h1),
            nn.Sigmoid(),
            nn.Dropout(0.5)
        )

        self.layer2 = nn.Sequential(
            nn.Linear(h1,h2),
            nn.Sigmoid(),
            nn.Dropout(0.5)
        )

        self.outlayer = nn.Sequential(
            nn.Linear(h2,out)
        )

    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.outlayer(x)
        return x

In [578]:
config = {
    "epochs":1000,
    "checkInterval":5,
    "printInterval":30,
    "patience":30
}

outputConfig = {
    "maxEpochs":1000,
    "losses": []
}

In [579]:
def train(newModel,canPrint=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(newModel.parameters(),lr=learning_rate)

    best_val_loss = float('inf')
    no_improvement_count = 0
    temp_losses = []
    temp_epoch = 1000

    best_model_state = newModel.state_dict()

    for epoch in range(config["epochs"]):
        newModel.train()
        y_pred = newModel(x_train)
        loss = criterion(y_pred, y_train)
        temp_losses.append(loss.item())  # Append the loss value as a Python scalar
        if (epoch % config["printInterval"] == 0 and canPrint == True):
            print(f"Epoch: {epoch} and loss: {loss.item()}\t No improvement:{no_improvement_count}")

        # Early stopping
        if (epoch % config["checkInterval"] == 0):
            newModel.eval()
            with torch.no_grad():
                y_val_pred = newModel(x_val)
                val_loss = criterion(y_val_pred, y_val)
                if val_loss.item() < best_val_loss:
                    best_val_loss = val_loss.item()
                    no_improvement_count = 0  # Reset the no improvement count
                    best_model_state = newModel.state_dict()
                else:
                    no_improvement_count += 1
                    
                    if no_improvement_count >= config["patience"]:
                        temp_epoch = epoch
                        newModel.load_state_dict(best_model_state)
                        break
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        
    if (canPrint==True):
        print(f"Best epoch: {temp_epoch}")
    
    outputConfig["losses"] = temp_losses
    outputConfig["maxEpochs"] = temp_epoch
    return newModel

In [580]:
def trainModel(newModel):
    newModel= train(newModel,True)
    return newModel


In [581]:
# graph out loss for model
def graph():
    plt.plot(range(outputConfig["maxEpochs"]),outputConfig["losses"])
    plt.ylabel("loss/error")
    plt.xlabel("epoch")

In [582]:
import json

incorrect = {i: 0 for i in range(21)}  # Initialize the nested dictionary

def correct(model):
    incorrect = {i: 0 for i in range(21)} 
    model.eval()
    correct = 0

    with torch.no_grad():
        y_val = model.forward(x_test)
        for i in range(len(y_test)):
            predicted_label = y_val[i].argmax().item()
            true_label = y_test[i].item()

            if predicted_label == true_label:
                correct += 1
            else:
                incorrect[true_label] += 1

    accuracy = correct / len(y_test)
    return accuracy

def saveIncorrectToJSON(): 
    with open('incorrect_predictions.json', 'w') as json_file:
        json.dump(incorrect, json_file, indent=4)

In [583]:
newModel = Model()
optimizer = torch.optim.Adam(newModel.parameters(),lr=learning_rate)
newModel = trainModel(newModel)
accuracy = correct(newModel)
accuracy

Epoch: 0 and loss: 3.1384994983673096	 No improvement:0
Epoch: 30 and loss: 3.0414226055145264	 No improvement:0
Epoch: 60 and loss: 2.9032771587371826	 No improvement:0
Epoch: 90 and loss: 2.803936243057251	 No improvement:0
Epoch: 120 and loss: 2.668872117996216	 No improvement:0
Epoch: 150 and loss: 2.5383694171905518	 No improvement:0
Epoch: 180 and loss: 2.428770065307617	 No improvement:0
Epoch: 210 and loss: 2.325904607772827	 No improvement:0
Epoch: 240 and loss: 2.2201316356658936	 No improvement:0
Epoch: 270 and loss: 2.0953588485717773	 No improvement:0
Epoch: 300 and loss: 1.9936119318008423	 No improvement:0
Epoch: 330 and loss: 1.884547472000122	 No improvement:0
Epoch: 360 and loss: 1.8115012645721436	 No improvement:2
Epoch: 390 and loss: 1.7454336881637573	 No improvement:0
Epoch: 420 and loss: 1.6940008401870728	 No improvement:1
Epoch: 450 and loss: 1.602487564086914	 No improvement:0
Epoch: 480 and loss: 1.529606819152832	 No improvement:3
Epoch: 510 and loss: 1.490

0.5847619047619048

In [584]:
def testMultiple():
    in_features_const = 1044
    #h1:h2
    layers = [
        (520, 256),
        (512, 256), (512, 128), (512, 64), (512, 32),
        (256, 128), (256, 64), (256, 32),
        (1024, 512), (1024, 256), (1024, 128),
    ]

    for i in layers:
        h1 = i[0]
        h2 = i[1]
        print(f"h1:{h1} and h2:{h2}")
        newModel = Model(in_features_const,h1,h2)
        newModel = trainModel(newModel)
        correct(newModel)

#testMultiple()

In [585]:
# save model
def save(model):
    torch.save(model.state_dict(),"classify.pt")