#Group 17, Primary Model and Architecture
Group members: Tiger Luo, Kyle Wang

APS360

August 15, 2022

In [None]:
#Libraries used
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision
import torchvision.datasets
import torchvision.transforms as transforms
from google.colab import drive
from PIL import Image, ImageOps
drive.mount('/content/drive')

#Autoencoder

In [None]:
#This is what our autoencoder looks like
class HMEAutoencoder(nn.Module):
    def __init__(self):
        super(HMEAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 24, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(24, 48, 3, stride=2, padding=1)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(48, 24, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(24, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )
      
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
transform = transforms.Compose([transforms.PILToTensor()])

local_dir = "" #Change to the directory of folder the project folder is in

file_dir = local_dir + "/Group 17 - Final Deliverable/Autoencoder data" 

data=torchvision.datasets.ImageFolder(file_dir)

In [None]:
train_a_data=[]
for i in range(4000): #Can change 4000 to anything needed
    train_a_data.append(data[i][0])

In [None]:
#This is our primary training block, all the functions used in training the autoencoder are here

def rand_invert(image): #Adds a 50% chance to invert image colours
    rand=torch.rand(1)
    if rand[0]<0.5:
        image=transforms.functional.invert(image)
        #print("inverted") #For debugging purposes
    #else: 
        #print("not inverted")
    return image

def image_processing(image):
    image=transform(image)/255 #Converts from int to float type pictures
    height=image.shape[1]
    width=image.shape[2]
    height=4*int(height/4) #rounds height and width down to the nearest multiple of 4
    width=4*int(width/4)
    image=image[:,:height,:width]

    augmented_image=augment(image) #50% chance to invert
    return image, augmented_image #Returns cropped and augmented images

def augment(image): #Put image augmentations here, perhaps could've done other augmentations, but there's only two for now
    #image=image-0.4*torch.randn(*image.shape) #Adding noise, doesn't do much so we removed it for now
    image=np.clip(image,0.,1.)
    image=rand_invert(image)
    return image

def train(model, HME_Data, num_epochs=100, learning_rate=0.01):
    torch.manual_seed(20)
    criterion = nn.MSELoss()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

    outputs = []
    for epoch in range(num_epochs):
        for data in HME_Data:
            optimizer.zero_grad()
            image, augmented_image=image_processing(data)
            recon = model(augmented_image)
            loss = criterion(recon, image)
            loss.backward()
            optimizer.step()
            
        print('Epoch:{}, Loss:{:.4f}'.format(epoch+1, float(loss)))
        outputs.append((epoch, loss, image, recon),)
    return outputs

In [None]:
autoencoder=HMEAutoencoder()
outputs_auto=train(autoencoder, train_a_data, 100, 0.01) #Can do more than 100 epochs, but we only did 100 for time's sake

#Autoencoder Visualizations

In [None]:
#This helps visualize how well it reconstructs over time

fig=plt.figure(figsize=(50, 150))
rows=100 #Change to however many epochs you had
cols=1

outputs=outputs_auto

for epoch, loss, image, recon in outputs:
    recon_out=recon.detach().numpy()
    recon_out=np.transpose(recon_out,[1,2,0])
    fig.add_subplot(rows,cols,epoch+1)
    plt.imshow(recon_out)
    plt.axis('off')
    plt.title("Epoch: {}".format(epoch))

In [None]:
#This is meant to plot the loss curve

x_axis=[]
y_axis=[]

for epoch, loss, image, recon in outputs:
    x_axis.append(epoch)
    y_axis.append(loss.detach().numpy())

plt.title("Loss curve")
plt.plot(x_axis,y_axis, label="Loss")

In [None]:
#Allows you to compare side by side the image and its reconstruction

image_index=400
model=autoencoder #If using a different name, put it here

fig=plt.figure(figsize=(10,10))
rows=2
cols=1

image=image_processing(train_a_data[image_index])[1]
image_show=image.detach().numpy()
image_show=np.transpose(image_show,[1,2,0])
fig.add_subplot(rows,cols,1)
plt.imshow(image_show)
plt.axis('off')
plt.title("Image")


recon=model(image)
recon_out=recon.detach().numpy()
recon_out=np.transpose(recon_out,[1,2,0])
fig.add_subplot(rows,cols,2)
plt.imshow(recon_out)
plt.axis('off')
plt.title("Recon")

#Classifier

In [None]:
class HMEClassifier(nn.Module):
    def __init__(self):
        super(HMEClassifier, self).__init__()
        self.layer1 = nn.Linear(65*22*48, 2000)
        self.layer2 = nn.Linear(2000, 100)
        self.layer3 = nn.Linear(100, 9)
    def forward(self, img):
        flattened = img.view(-1, 65*22*48)
        activation1 = F.relu(self.layer1(flattened))
        activation2 = F.relu(self.layer2(activation1))
        output = self.layer3(activation2)
        return output

def full_model(model, encoder, img): #The full model is the encoder, a pooling layer, and the actual classifier
    pool=nn.MaxPool2d(2,2)

    encodings=encoder(img) #Converts the image to embedding
    encodings=pool(encodings) #Pools that embedding
    out=model(encodings.detach()) #Then passes it into the classifier
    return out

In [None]:
def process_image(img):
    img=transform(img)/255
    return img

def get_accuracy(data, model, encoder):
    correct = 0
    total = 0
    for imgs, labels in torch.utils.data.DataLoader(data, batch_size=100):
        output = full_model(model, encoder, imgs) #Puts the image through the model, one at a time, gets an output of a size 10 vector
        pred = output.max(1, keepdim=True)[1] 
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]
    return correct / total


def train_classifier(model, encoder, train_data, val_data, epochs=80, learning_rate=0.0001): #Keep lr at 0.0001 or lower
    criterion=nn.CrossEntropyLoss()
    optimizer=torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

    outputs=[]
    
    train_batch=torch.utils.data.DataLoader(train_data, 40)

    one_hot_encoding=torch.eye(9) #For turning labels into one hot encodings

    for epoch in range(epochs):
        batch_no=0
        total_loss=0
        for imgs, label in train_batch:
            optimizer.zero_grad()
            label=one_hot_encoding[label] #Converts label to OHE

            out=full_model(model, encoder, imgs)
            loss=criterion(out, label)
            #print(loss) #For debugging, previously had issue w/ exploding gradients, so default lr was lowered to 0.0001
            loss.backward()
            optimizer.step()

            total_loss=total_loss+loss #For averaging loss
            batch_no+=1

        print('Epoch:{}, Loss:{:.4f}'.format(epoch+1, float(total_loss/batch_no))) #Prints average loss
        
        train_acc=get_accuracy(train_data, model, encoder)
        val_acc=get_accuracy(val_data, model, encoder)
        print('Train acc: {:.4f}, val acc: {:.4f}'.format(train_acc, val_acc))
        outputs.append((epoch, total_loss/batch_no, train_acc, val_acc),)

    return outputs

In [None]:
classifier_dir=local_dir + "/Group 17 - Final Deliverable/Classified HMEs" 
classifier_data=torchvision.datasets.ImageFolder(classifier_dir)

print(classifier_data) #Total amount of classifier data is 920 entries

In [None]:
def manual_pull(iterable, indices):
    output=[]
    for i in indices:
        image=transform(iterable[i][0])/255
        label=iterable[i][1]
        output.append((image,label))
    return output

np.random.seed(60)
data_len=len(classifier_data)
indices=list(range(data_len))
np.random.shuffle(indices)
train_indices=indices[:840] #Manually split the data here, you can change it if you'd like
val_indices=indices[840:920] 
#No testing data, because demo data is used instead

train_c_set=manual_pull(classifier_data,train_indices) #This is to format the data correctly
val_c_set=manual_pull(classifier_data,val_indices)

In [None]:
encoder=autoencoder.encoder #Takes the encoder from the autoencoder
classifier=HMEClassifier()

In [None]:
outputs=train_classifier(classifier,encoder,train_c_set, val_c_set, 80, 0.0001) #Trains on 80 epochs

In [None]:
#For early stopping with patience, monitor the accuracies to ensure it doesn't overfit or go off weirdly
n=1 #Can go epoch by epoch, or larger steps if wanted. We recommend sticking with just 1 though
#We used a period of 5 or 6 epochs for early stopping
epochs=80 #Keep track of your epochs here, update every time you run the code

out_temp=train_classifier(classifier,encoder,train_c_set, val_c_set, n, 0.0001)
if n=1:
    outputs.append((epochs+1,out_temp[1], out_temp[2],out_temp[3]))
else:
    count=0
    for out in out_temp:
        count+=1
        outputs.append((epochs+count,out[1], out[2],out[3]))

#Classifier Visualizations

In [None]:
#Visualizes loss curve
x_axis=[]
y_axis=[]
train_accs=[]
val_accs=[]
for epoch, loss, train_acc, val_acc in outputs:
    x_axis.append(epoch)
    y_axis.append(loss.detach().numpy())
    train_accs.append(train_acc)
    val_accs.append(val_acc)
print(outputs)
plt.title("Loss curve")
plt.plot(x_axis,y_axis, label="Loss")

In [None]:
#Visualizes train and val accuracies
plt.title("Training Curve")
plt.plot(x_axis, train_accs, label="Train")
plt.plot(x_axis, val_accs, label="Val")
plt.xlabel("Epochs")
plt.ylabel("Training Accuracy")
plt.legend(loc='best')
plt.show()

In [None]:
#For manually checking the output of each image in the classifier dataset
#Note that 0-99 should be class 0, 100-199 should be class 1, etc

pool=nn.MaxPool2d(2,2)
for img, label in classifier_data:

    img=process_image(img)
    encod=encoder(img)
    encod=pool(encod)
    encod=encod.detach()
    out=classifier(encod)
    print("Image no: {}, output: {:.0f}".format(n, float(out.argmax())))
    n+=1

In [None]:
#For checking the accuracy class by class

class_names=("ODE", "Complex", "Differentiation", "Inequalities", "Integration", "Limits", "Logarithms", "Parametrics", "Trigonometry")

def get_accuracy_mod(data, model, encoder):
    correct = 0
    total = 0
    for imgs, labels in data:
        output = full_model(model, encoder, imgs) #Puts the image through the model, one at a time, gets an output of a size 10 vector
        #select index with maximum prediction score
        pred = output.max(1, keepdim=True)[1] 
        if (pred==labels):
            correct+=1
        total+=1
    return correct / total

def get_indices(data, classification):
    indices=[]
    for i in range(len(data)):
        if (data[i][1]==classification):
            indices.append(i)
    return indices

for i in range(9):
    class_dataset=manual_pull(classifier_data, get_indices(classifier_data, i))
    accuracy=get_accuracy_mod(class_dataset, classifier, encoder)
    print("Class: {}. Accuracy: {:.4f}".format(class_names[i], accuracy))

In [None]:
#For checking the probability distribution for an individual image

n=200 #Put the index of the image you want to check here
pool=nn.MaxPool2d(2,2)
test_image=process_image(classifier_data[n][0])
encod=encoder(test_image)
encod=pool(encod)
encod=encod.detach()


image_show=test_image.detach().numpy()
image_show=np.transpose(image_show,[1,2,0])
fig.add_subplot(rows,cols,1)
plt.imshow(image_show)
plt.axis('off')
plt.title("Image")

output=classifier(encod)
prob=F.softmax(output, dim=-1)
prob=F.softmax(prob, dim=-1)
for i in range(9):
    print("{} probability: {:.4f}".format(class_names[i], prob[0][i]*100))

In [None]:
#This is just a repeat of the autoencoder reconstruction visualization, but for classifier data instead

n=200 #Put the index of the image you want to check here
fig=plt.figure(figsize=(10,10))
rows=2
cols=1

model=autoencoder

image=image_processing(classifier_data[n][0])[1]
image_show=image.detach().numpy()
image_show=np.transpose(image_show,[1,2,0])
fig.add_subplot(rows,cols,1)
plt.imshow(image_show)
plt.axis('off')
plt.title("Image")

recon=model(image)
recon_out=recon.detach().numpy()
recon_out=np.transpose(recon_out,[1,2,0])
fig.add_subplot(rows,cols,2)
plt.imshow(recon_out)
plt.axis('off')
plt.title("Recon")

#Demonstration

In [None]:
demo_dir=local_dir + "/Group 17 - Final Deliverable/Demonstration images"
demo_im=torchvision.datasets.ImageFolder(demo_dir)

demo_data=[]
for img, label in demo_im:
    img=process_image(img)
    demo_data.append(img)
print(demo_im)

In [None]:
#Repeat of checking probability distributions, but for the demonstration data

n=61
pool=nn.MaxPool2d(2,2)
test_image=demo_data[n]
encod=encoder(test_image)
encod=pool(encod)
encod=encod.detach()


image_show=test_image.detach().numpy()
image_show=np.transpose(image_show,[1,2,0])
fig.add_subplot(rows,cols,1)
plt.imshow(image_show)
plt.axis('off')
plt.title("Image")

output=classifier(encod)
prob=F.softmax(output, dim=-1)
prob=F.softmax(prob, dim=-1)
for i in range(9):
    print("{} probability: {:.4f}".format(class_names[i], prob[0][i]*100))

In [None]:
#Repeat of visualizing autoencoder reconstruction, but for the demonstration data

n=51
fig=plt.figure(figsize=(10,10))
rows=2
cols=1

image=demo_data[n]
image_show=image.detach().numpy()
image_show=np.transpose(image_show,[1,2,0])
fig.add_subplot(rows,cols,1)
plt.imshow(image_show)
plt.axis('off')
plt.title("Image")

recon=model(image)
recon_out=recon.detach().numpy()
recon_out=np.transpose(recon_out,[1,2,0])
fig.add_subplot(rows,cols,2)
plt.imshow(recon_out)
plt.axis('off')
plt.title("Recon")