In [None]:
# Import statements
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split, TensorDataset
import torchvision.transforms as transforms
import torchvision.models
import matplotlib.pyplot as plt
import time
import pandas as pd
from IPython.display import display

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f972a0ec650>

In [None]:
# Mount drive to retrieve images
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Retrieve test images
!mkdir Mush_test
!mkdir Mush_val
!mkdir Mushaugm1
!mkdir Mushaugm2
!unzip /content/drive/MyDrive/APS360_Project/Mush_test.zip -d /content/Mush_test
!unzip /content/drive/MyDrive/APS360_Project/Mush_val.zip -d /content/Mush_val
!unzip /content/drive/MyDrive/APS360_Project/Mushaugm1.zip -d /content/Mushaugm1
!unzip /content/drive/MyDrive/APS360_Project/Mushaugm2.zip -d /content/Mushaugm2

In [None]:
# Below is a series of models to be tested. ONLY RUN THE ONE YOU WANT TO TEST---------------------------------------
# if you run multiple, chances are that nothing will break and it would give result of last one
# make sure to change the main model variable in get_set_matrix in the testing blocks

In [None]:
# ONLY RUN IF TESTING RESNET----------------------------------------------------

# Define the FC layers
class ClassyMush(nn.Module):
    def __init__(self):
        super(ClassyMush, self).__init__()
        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(512, 9)
        #self.fc3 = nn.Linear(1024, 9)

    def forward(self, x):
        x = x.view(-1, 2048*1*1) #flatten feature data
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.squeeze(1)
        return x

fungi_net = ClassyMush()

# Download a pretrained ResNet-152 net model
ResNet = torchvision.models.resnet152(pretrained=True, progress=False)
modules = list(ResNet.children())[:-1] # delete the last fc layer.
ResNetCL = nn.Sequential(*modules)

# Import the weights
save_path = "/content/drive/MyDrive/APS360_Project/resnet152_states/Jun_Ho/unfreeze/"
model_path = save_path + 'ResNet_bs64_lr0.001_ne100'
fungi_net.load_state_dict(torch.load(model_path + ".pt", map_location=torch.device('cpu')))
ResNetCL.load_state_dict(torch.load(model_path + "_ResNet.pt", map_location=torch.device('cpu')))

fungi_net.eval()
ResNetCL.eval()

In [None]:
# ONLY RUN IF TESTING VGG--------------------------------------------------------
vgg_full_model = torchvision.models.vgg19(pretrained=True, progress=False).features

# Fully connected model
class ClassyMush(nn.Module):
    def __init__(self):
        super(ClassyMush, self).__init__()
        self.fc1 = nn.Linear(512 * 7 * 7, 4096)
        self.fc2 = nn.Linear(4096, 9)

    def forward(self, x):
        x = x.view(-1, 512 * 7 * 7) #flatten feature data
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.squeeze(1)
        return x

fungi_net = ClassyMush()

# Import the weights
save_path = "/content/drive/MyDrive/APS360_Project/VGGstates/bs250_lr0.001_e10.pt"
fungi_net.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
# ONLY RUN IF TESTING ANN BASELINE-----------------------------------------------
# Fully connected model
class BasedANN(nn.Module):
    def __init__(self):
        super(BasedANN, self).__init__()
        self.fc1 = nn.Linear(3 * 240 * 240, 1024)
        self.fc2 = nn.Linear(1024, 9)

    def forward(self, x):
        x = x.view(-1, 3 * 240 * 240) #flatten feature data
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.squeeze(1)
        return x

fungi_net = BasedANN()
save_path = '/content/drive/MyDrive/APS360_Project/ANNBaseline/bs512_lr0.001_epoch10'
fungi_net.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
# ONLY RUN IF TESTING CNN BASELINE----------------------------------------------
class MushCNN(nn.Module):
    def __init__(self):
        super(MushCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(5, 10, 5)
        self.fc1 = nn.Linear(10 * 57 * 57, 1000)
        self.fc2 = nn.Linear(1000, 9)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 10 * 57 * 57)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

fungi_net = MushCNN()
save_path = '/content/drive/MyDrive/APS360_Project/CNNBaseline/MushCNN_512_0.005_23'
fungi_net.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
data_folders = ['Mush_test', 'Mush_val', 'Mushaugm1', 'Mushaugm2']

img_transform = transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset_dicts = {}

for folder_name in data_folders:
  dataset_dicts[folder_name] = torchvision.datasets.ImageFolder(folder_name, transform=img_transform)

# https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py - classes returned in order
classnames = dataset_dicts['Mush_test'].classes
num_classes = len(classnames)
placeholder_model = lambda arg: arg

In [None]:
# go through each dataset
def get_set_matrix(folder_name, pret_model=None):
  dataset = dataset_dicts[folder_name]
  image_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)
  
  # confusion_mat[actual][pred]
  confusion_mat = []
  for i in range(num_classes):
    confusion_mat.append([0 for j in range(num_classes)])

  # move model to and from cuda only once
  if torch.cuda.is_available():
    torch.cuda.empty_cache()
    fungi_net.cuda()
    if pret_model != None:
      pret_model.cuda()

  at = 0  # tracking
  for img, label in image_loader:
    at += 1
    # print('image', at, end=': ')
    
    if torch.cuda.is_available():
      img = img.cuda()
      label = label.cuda()

    output = None
    if pret_model != None:  
      output = fungi_net(pret_model(img))
    else:
      output = fungi_net(img)

    pred = output.max(1, keepdim=True)[1]
    pred = pred[0][0].item()
    true_class = label[0].item()
    confusion_mat[true_class][pred] += 1
    # print('pred:', pred, '  actual:', true_class)

    if torch.cuda.is_available():
      img = img.cpu()
      label = label.cpu()
      torch.cuda.empty_cache()

  # move model to and from cuda only once
  if torch.cuda.is_available():
    fungi_net.cpu()
    if pret_model != None:
      pret_model.cpu()
    torch.cuda.empty_cache()

  return confusion_mat

In [None]:
# # checking that I, indeed, have made the table correctly
dataset = dataset_dicts['Mush_test']
image_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)
cnts = [0 for i in classnames]
for img, label in image_loader:
  cnts[label[0].item()] += 1
for mush_cls in classnames:
  print(mush_cls, cnts[dataset.class_to_idx[mush_cls]])

Agaricus 36
Amanita 66
Boletus 101
Cortinarius 90
Entoloma 39
Hygrocybe 35
Lactarius 169
Russula 110
Suillus 23


In [None]:
TruePositive = 0
TrueNegative = 0
FalsePositive = 0
FalseNegative = 0

rowAccuracy = 0
for rowAccuracy in range(num_classes):
  TruePositive = 0
  TrueNegative = 0
  FalsePositive = 0
  FalseNegative = 0
  for row in range (num_classes):
    for col in range (num_classes):
      if row == rowAccuracy and col == rowAccuracy:
        TruePositive = test_fold_mat[row][col]
      elif row == rowAccuracy:
        FalseNegative += test_fold_mat[row][col]
      elif col == rowAccuracy:
        FalsePositive += test_fold_mat[row][col]
      else:
        TrueNegative += test_fold_mat[row][col]
      #print(classnames[row])
      #print(test_fold_mat[row][col])

  print(classnames[rowAccuracy])
  Precision = TruePositive/(TruePositive+FalsePositive)
  Recall =TruePositive/(TruePositive+FalseNegative)
  print("Precision %.2f" %Precision)
  print("Recall %.2f" %Recall)
  print("F1 %.2f" %(2*(Recall * Precision)/(Precision+Recall)))
  #print("True Positives",TruePositive)
  #print("FalseNegatives",FalseNegative)
  #print("FalsePositives",FalsePositive)
  #print("TrueNegatives",TrueNegative)
  print("\n")

Agaricus
Precision 0.44
Recall 0.11
F1 0.18


Amanita
Precision 0.45
Recall 0.15
F1 0.23


Boletus
Precision 0.44
Recall 0.39
F1 0.41


Cortinarius
Precision 0.22
Recall 0.12
F1 0.16


Entoloma
Precision 0.17
Recall 0.05
F1 0.08


Hygrocybe
Precision 0.38
Recall 0.09
F1 0.14


Lactarius
Precision 0.30
Recall 0.60
F1 0.40


Russula
Precision 0.32
Recall 0.40
F1 0.35


Suillus
Precision 0.12
Recall 0.04
F1 0.06




In [None]:
# Test Set
test_fold_mat = get_set_matrix('Mush_test') # notice that get_set_matrix has an optional second param, which is the main model in transfer learning
test_df = pd.DataFrame(test_fold_mat, index=classnames, columns=classnames)
display(test_df)

number_correct = 0
for rowi in range(num_classes):
  total = sum(test_fold_mat[rowi])
  print(total)
  print(classnames[rowi], ':', '%.2f'%(test_fold_mat[rowi][rowi] / total * 100), '%')
  number_correct += test_fold_mat[rowi][rowi]
  print(number_correct)
print("Total accuracy: {:.2f}%".format(number_correct / len(dataset_dicts['Mush_test']) * 100))
# each row represents an actual class, while each column represents a guessed class
# e.g. 7 Agaricus shrooms were wrongly guessed as Boletus by CNN baseline

Unnamed: 0,Agaricus,Amanita,Boletus,Cortinarius,Entoloma,Hygrocybe,Lactarius,Russula,Suillus
Agaricus,4,1,1,1,1,0,13,15,0
Amanita,0,10,8,6,1,0,31,10,0
Boletus,0,1,39,8,0,1,40,12,0
Cortinarius,1,1,12,11,3,3,44,15,0
Entoloma,1,2,3,4,2,0,22,4,1
Hygrocybe,0,0,4,1,2,3,20,4,1
Lactarius,1,3,16,12,1,0,101,30,5
Russula,2,3,2,5,1,1,52,44,0
Suillus,0,1,3,2,1,0,10,5,1


36
Agaricus : 11.11 %
4
66
Amanita : 15.15 %
14
101
Boletus : 38.61 %
53
90
Cortinarius : 12.22 %
64
39
Entoloma : 5.13 %
66
35
Hygrocybe : 8.57 %
69
169
Lactarius : 59.76 %
170
110
Russula : 40.00 %
214
23
Suillus : 4.35 %
215
Total accuracy: 32.14%


In [None]:
# Validation Set -- PROBABLY NOT WHAT WE ARE LOOKING FOR
val_fold_mat = get_set_matrix('Mush_val', ResNetCL) # notice that get_set_matrix has an optional second param, which is the main model in transfer learning
val_df = pd.DataFrame(val_fold_mat, index=classnames, columns=classnames)
display(val_df)

number_correct = 0
for rowi in range(num_classes):
  total = sum(val_fold_mat[rowi])
  print(classnames[rowi], ':', '%.2f'%(val_fold_mat[rowi][rowi] / total * 100), '%')
  number_correct += val_fold_mat[rowi][rowi]

print("Total accuracy: {:.2f}%".format(number_correct / len(dataset_dicts['Mush_val']) * 100))