In [None]:
!git clone https://github.com/pranavp369/AIA-Project.git

fatal: destination path 'AIA-Project' already exists and is not an empty directory.
Cloning into 'Warwick_Dataset'...
fatal: could not read Username for 'https://github.com': No such device or address


In [None]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader , random_split
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.distributed import Dataset
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import copy
import time
import re
import pandas as pd
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC 
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
#Supporting functions

def flatten(l):
    return [item for sublist in l for item in sublist]

def test_loader(dataset):
  data_size = len(dataset)
  data_loader = DataLoader(dataset, batch_size = data_size, shuffle = False)
  return next(iter(data_loader)) 


In [None]:
#Customize the Dataset
class Spiral_DataSet(Dataset):
  
  def __init__(self,image_dir):
    self.image_dir = Path(image_dir)
    self.label = [i for i in os.listdir(image_dir)]
    self.image_path_list = list(self.image_dir.glob("*/*.tiff"))
    
    
  
  def __len__(self):
    
    return len(self.image_path_list)

  def __getitem__(self, index):

    #print(index)
    image = Image.open(self.image_path_list[index]).convert('RGB')
    label = self.image_path_list[index].parent.stem
    image_transforms = transforms.Compose([
                                     transforms.CenterCrop(size=(490,600)),\
                                     transforms.Resize(size=(256,256)),\
                                     transforms.ToTensor(),\
                                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
    
    image = image_transforms(image)

    if label == "Control":
      label_class = 0
    else:
      label_class = 1


    return (image,label_class)

In [None]:
#Customize the Dataset
class Spiral_DataSet_PNG(Dataset):
  
  def __init__(self,image_dir):
    self.image_dir = Path(image_dir)
    self.label = [i for i in os.listdir(image_dir)]
    self.image_path_list = list(self.image_dir.glob("*/*.png"))
    
    
  
  def __len__(self):
    
    return len(self.image_path_list)

  def __getitem__(self, index):

    #print(index)
    image = Image.open(self.image_path_list[index]).convert('RGB')
    label = self.image_path_list[index].parent.stem
    image_transforms = transforms.Compose([
                                     transforms.CenterCrop(size=(490,600)),\
                                     transforms.Resize(size=(256,256)),\
                                     transforms.ToTensor(),\
                                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
    
    image = image_transforms(image)

    if label == "Control":
      label_class = 0
    else:
      label_class = 1


    return (image,label_class)

In [None]:
def AIA_model(model,dataset, criterion, optimizer, scheduler, no_of_epochs):

  model.to(device)

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0
  count = 0
  output = []

  for i in range(no_of_epochs):
      per = 0.2
      a = len(dataset)*(1-per)
      b = len(dataset)*per
      trainingdataset, testdataset = torch.utils.data.random_split(dataset, [int(a),int(b)])
      dataset_size = [len(trainingdataset),len(testdataset)]

      train_data = DataLoader(trainingdataset, batch_size=8, shuffle=True)
      test_data = test_loader(testdataset)
      x_test, y_test = test_data[0].to(device), test_data[1].to(device)
    
      print(f"{i+1} Epoch out of {no_of_epochs} Epochs")
      
      running_loss = 0.0
      running_corrects = 0

      for j in train_data:
          image_tensor,label_tensor = j[0].to(device), j[1].to(device)

          optimizer.zero_grad()
          out = model(image_tensor)
          _,pred = torch.max(out,1)
          loss = criterion(out,label_tensor)
          loss.backward()
          optimizer.step()
          running_loss += loss.item()*image_tensor.size(0)
          running_corrects += torch.sum(pred==label_tensor.data) 


      epoch_loss = running_loss/dataset_size[0]
      epoch_accuracy = running_corrects.double()/dataset_size[0]
      print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_accuracy:.4f}')

      if epoch_accuracy >= best_acc:
            best_acc = epoch_accuracy
            best_model_wts = copy.deepcopy(model.state_dict())


      #Test Data
      with torch.no_grad():

        test_out = model(x_test)
        
        _,test_pred = torch.max(test_out,1)
        test_loss = criterion(test_out, y_test)/dataset_size[1]
        test_acc = torch.sum(test_pred==y_test.data)/dataset_size[1]
        print(f'Test Loss:{test_loss.item():.4f} Acc: {test_acc:.4f}')

  print(f"Best Accuracy: {best_acc}")   
  model.load_state_dict(best_model_wts)
    
  return model    


In [None]:
dataset = Spiral_DataSet('/content/AIA-Project/data/')

In [None]:
net = models.resnet50(weights = 'ResNet50_Weights.DEFAULT')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, patience=5)

num_features = net.fc.in_features

net.fc = nn.Linear(num_features,2)
#print(net)
#feature_extractor = torch.nn.Sequential(*list(net.children())[:-1])
#print(feature_extractor)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
num_epochs = 50
model= AIA_model(net, dataset, criterion, optimizer, scheduler, num_epochs)

1 Epoch out of 50 Epochs
Train Loss: 0.5999 Acc: 0.7188
Test Loss:0.0131 Acc: 0.7083
2 Epoch out of 50 Epochs
Train Loss: 0.5557 Acc: 0.7500
Test Loss:0.0109 Acc: 0.7500
3 Epoch out of 50 Epochs
Train Loss: 0.4539 Acc: 0.7812
Test Loss:0.0094 Acc: 0.7708
4 Epoch out of 50 Epochs
Train Loss: 0.4011 Acc: 0.8073
Test Loss:0.0053 Acc: 0.8542
5 Epoch out of 50 Epochs
Train Loss: 0.2521 Acc: 0.8802
Test Loss:0.0042 Acc: 0.9167
6 Epoch out of 50 Epochs
Train Loss: 0.3052 Acc: 0.8594
Test Loss:0.0047 Acc: 0.9167
7 Epoch out of 50 Epochs
Train Loss: 0.2488 Acc: 0.9010
Test Loss:0.0103 Acc: 0.7708
8 Epoch out of 50 Epochs
Train Loss: 0.1877 Acc: 0.9167
Test Loss:0.0046 Acc: 0.9167
9 Epoch out of 50 Epochs
Train Loss: 0.1351 Acc: 0.9531
Test Loss:0.0030 Acc: 0.9583
10 Epoch out of 50 Epochs
Train Loss: 0.1843 Acc: 0.9323
Test Loss:0.0023 Acc: 0.9583
11 Epoch out of 50 Epochs
Train Loss: 0.2351 Acc: 0.8802
Test Loss:0.0051 Acc: 0.8542
12 Epoch out of 50 Epochs
Train Loss: 0.2085 Acc: 0.9271
Test L

In [None]:
def feature_extraction(model, dataset):
    dataset_len = len(dataset)

    count = 0
    batchsize = 10

    data = DataLoader(dataset, batch_size=batchsize,shuffle=True)
    

    data_features = np.zeros((len(dataset),2048))
    data_label = np.zeros((len(dataset),1))

    feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

    for i in data:
        x_data, y_data = i[0].to(device), i[1].to(device)
    
        #extracting Features
        test_output = feature_extractor(x_data)

        feature_output_int = test_output.cpu().detach().numpy()
        feature_label = y_data.cpu().detach().numpy()

        for index in range(batchsize):
            feat = flatten(feature_output_int[index])
            data_features[count+index,:] = flatten(feat)
            data_label[count+index] = int(feature_label[index])
        count = count+batchsize
    
    train_x = pd.DataFrame(data_features)
    train_y = pd.DataFrame(data_label)

    print("X- DATA:",x_data.shape)
    print("Feature - DATA:",data_features.shape)

    return train_x, train_y

In [None]:
x_data, y_data = feature_extraction(model, dataset)

X- DATA: torch.Size([10, 3, 256, 256])
Feature - DATA: (240, 2048)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.20, random_state=42)

In [None]:
hiddentest = Spiral_DataSet_PNG('/content/AIA-Project/Hidden Test/')

#x_data,y_data = feature_extraction(model,hiddentest)

hiddenset = test_loader(hiddentest)
x_data, y_data = hiddenset[0].to(device),hiddenset[1].to(device)
hidden_pred_int = model(x_data)

_,hidden_pred = torch.max(hidden_pred_int,1)
test_acc = torch.sum(hidden_pred==y_data.data)/len(hiddentest)
print(f'Hidden Test Acc: {test_acc:.4f}')


Hidden Test Acc: 0.4667


In [None]:
hiddentest = Spiral_DataSet_PNG('/content/AIA-Project/Hidden Test/')

x_data,y_data = feature_extraction(model,hiddentest)

X- DATA: torch.Size([10, 3, 256, 256])
Feature - DATA: (30, 2048)


In [None]:
def metrics(prediction, labels):
    print("##########   SVM   ##########")
    print("#####   Precision   #####")
    print(precision_score(prediction, labels))
    print("#####   Accuracy   #####")
    print(accuracy_score(prediction, labels))
    print("#####   Recall   #####")
    print(recall_score(prediction, labels))



In [None]:
model_RFC = RFC()
model_RFC.fit(X_train,y_train)

train_preds_RFC = model_RFC.predict(X_train)
test_preds_RFC = model_RFC.predict(X_test)
hidden_preds_RFC = model_RFC.predict(x_data)



print("###############   Training Metrics   ###############")
metrics(train_preds_RFC,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_RFC,y_test)

print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_RFC,y_data)

  model_RFC.fit(X_train,y_train)


###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.9090909090909091
#####   Accuracy   #####
0.9375
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.8
#####   Accuracy   #####
0.5
#####   Recall   #####
0.5


In [None]:
model_SVC = SVC()
model_SVC.fit(X_train,y_train)

train_preds_SVC = model_SVC.predict(X_train)
test_preds_SVC = model_SVC.predict(X_test)
hidden_preds_SVC = model_SVC.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_SVC,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_SVC,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_SVC,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.8
#####   Accuracy   #####
0.5
#####   Recall   #####
0.5
#################################
[1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1.
 1. 1. 0. 1. 1. 1.]
#################################
      0
0   1.0
1   0.0
2   0.0
3   1.0
4   0.0
5   1.0
6   0.0
7   0.0
8   0.0
9   0.0
10  0.0
11  1.0
12  0.0
13  1.0
14  1.0
15  1.0
16  1.0
17  1.0
18  0.0
19  0.0
20  0.0
21  1.0
22  1.0
23  1.0
24  0.0
25  1.0
26  0.0
27  1.0
28  0.0
29  1.0


  y = column_or_1d(y, warn=True)


In [None]:
model_LDA = LDA()
model_LDA.fit(X_train,y_train)

train_preds_LDA = model_LDA.predict(X_train)
test_preds_LDA = model_LDA.predict(X_test)
hidden_preds_LDA = model_LDA.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_LDA,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_LDA,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_LDA,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
0.5
#####   Recall   #####
0.5


  y = column_or_1d(y, warn=True)


In [None]:
model_QDA = QDA()
model_QDA.fit(X_train,y_train)

train_preds_QDA = model_QDA.predict(X_train)
test_preds_QDA = model_QDA.predict(X_test)
hidden_preds_QDA = model_QDA.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_QDA,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_QDA,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_QDA,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
0.6875
#####   Recall   #####
0.6875
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
0.6
#####   Recall   #####
0.5555555555555556


  y = column_or_1d(y, warn=True)


In [None]:
model_KNN = KNN()
model_KNN.fit(X_train,y_train)

train_preds_KNN = model_KNN.predict(X_train)
test_preds_KNN = model_KNN.predict(X_test)
hidden_preds_KNN = model_KNN.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_KNN,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_KNN,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_KNN,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.7333333333333333
#####   Accuracy   #####
0.43333333333333335
#####   Recall   #####
0.4583333333333333


  return self._fit(X, y)


In [None]:
model_DTC = DTC()
model_DTC.fit(X_train,y_train)

train_preds_DTC = model_DTC.predict(X_train)
test_preds_DTC = model_DTC.predict(X_test)
hidden_preds_DTC = model_DTC.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_DTC,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_DTC,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_DTC,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.8
#####   Accuracy   #####
0.6
#####   Recall   #####
0.5714285714285714


In [None]:
model_ABC = ABC()
model_ABC.fit(X_train,y_train)

train_preds_ABC = model_ABC.predict(X_train)
test_preds_ABC = model_ABC.predict(X_test)
hidden_preds_ABC = model_ABC.predict(x_data)

print("###############   Training Metrics   ###############")
metrics(train_preds_ABC,y_train)
print("###############   Testing Metrics   ###############")
metrics(test_preds_ABC,y_test)
print("###############   Hidden Dataset Metrics   ###############")
metrics(hidden_preds_ABC,y_data)

###############   Training Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Testing Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
1.0
#####   Accuracy   #####
1.0
#####   Recall   #####
1.0
###############   Hidden Dataset Metrics   ###############
##########   SVM   ##########
#####   Precision   #####
0.9333333333333333
#####   Accuracy   #####
0.5666666666666667
#####   Recall   #####
0.5384615384615384


  y = column_or_1d(y, warn=True)
