In [1]:
# import torch and numpy and pretrained model
from torchvision import models
import torch
import numpy as np

# load pretrained model
pretrained_model = models.resnet50(weights = 'DEFAULT', progress = True)

In [2]:
# changes fully connected/classifier layer to new layer for us to train, 15 is the number of solutions or types of leaves
pretrained_model.fc = torch.nn.Linear(2048, 15)

In [3]:
# # Currently not in use, for if we wanna custom build a softmax but idk how to train it
# import torch.nn as nn
# # build custom softmax module
# class Softmax(nn.Module):
#     def __init__(self, n_inputs, n_outputs):
#         super().__init__()
#         self.linear = nn.Linear(n_inputs, n_outputs)
 
#     def forward(self, x):
#         pred = self.linear(x)
#         return pred

In [4]:
# # Currently not in use
# # adds softmax to model
# class MyModel(nn.Module):
#     def __init__(self, pretrained_model):
#         super(MyModel, self).__init__()
#         self.pretrained_model = pretrained_model
#         self.last_layer = Softmax(1000, n) # add how many nodes as input and output

#     def forward(self, x):
#         return self.last_layer(self.pretrained_model(x))

# model = MyModel(pretrained_model)

In [5]:
#freeze model except fc layer because we don't wanna retrain the pretrained model
for param in pretrained_model.parameters():
    param.requires_grad = False

for param in pretrained_model.fc.parameters():
    param.requires_grad = True

In [20]:
criterion = torch.nn.CrossEntropyLoss() #could write this out ourselves
# need to find an optimizer or make one for a custom softmax function
optimizer = torch.optim.SGD(pretrained_model.fc.parameters(), lr=0.001, momentum=0.9)

In [7]:
#import data
from PIL import Image
import os
from sklearn.preprocessing import OneHotEncoder
data_dir = "/Users/stevenli/SigAida/data/images/" #change to data directory

# gets the label based on the number
def getLabel(s):
    labels = {1 : 'Ulmus carpinifolia', 
                2 : 'Acer', 
                3 : 'Salix aurita', 
                4 : 'Quercus', 
                5 : 'Alnus incan', 
                6 : 'Betula pubescens', 
                7 : 'Salix alba \'Sericea\'', 
                8 : 'Populus tremula', 
                9 : 'Ulmus glabra', 
                10 : 'Sorbus aucuparia', 
                11 : 'Salix sinerea', 
                12 : 'Populus', 
                13 : 'Tilia', 
                14 : 'Sorbus intermedia', 
                15 : 'Fagus silvatica'}
    return labels.get(int(s))

im = [] # images
la = [] # labels not in use, because you can't feed strings to gpu, need to feed tensors
ohe = [] #s toring int values for one hot encodings
joined = []

for f in os.listdir(data_dir):
    im.append(data_dir + f)
    val = int(f[:-9].replace('l','')) # removes last 9 letters replaces l with blank then gets label
    la.append(getLabel(val)) 
    ohe.append(val)

#one hot encoding (changing 1-15 to tensors for gpu)
encoded = OneHotEncoder(categories = [[x for x in range(1, 16)]], sparse = False).fit_transform(np.array(ohe).reshape((len(ohe),1)))
# print(encoded)

#joining image and label for organization and if we wanna shuffle
for image, label in zip(im, encoded):
    joined.append([image, label])

# print(la)
# print(im)
# print(joined[1])



In [8]:
# some preprocessing, randomize data, split data, load into dataloaders
import random

# splits (train, val, test) test currently not in use
split_ratio=(0.8, 0.1, 0.1)

def create_splits(data, split_ratio):
    random.shuffle(data)
    train = [data[i] for i in range(0, round(.8 * len(data)))]
    val = [data[i] for i in range(round(.8 * len(data)), round(.9 * len(data)))]
    test = [data[i] for i in range(round(.9 * len(data)), len(data))]
    return train, val, test

train, val, test = create_splits(joined, split_ratio)

print(len(joined))
print(len(train))
print(len(val))
print(len(test))

1125
900
112
113


In [9]:
#making dataset
from torch.utils.data import Dataset

# makes a custom dataset based on pytorch dataset class
class PlantDataset(Dataset):
    def __init__(self, data_dir, arr, transform = None):
        # initialize some valuess
        self.data_dir = data_dir
        self.data = [x[0] for x in arr]
        self.arr = arr

        #transform to normalize/resize all images
        self.transform = transform
    
    def __len__(self):
        return len(self.arr)
    
    def __getitem__(self, idx):
        img = self.transform(Image.open(self.arr[idx][0]))
        s = self.arr[idx][1]
        #returns a tuple of the transformed image and the label (one-hot encoding)
        return (img, s)

In [10]:
import torchvision.transforms as transforms

#defining our transforms
transform = transforms.Compose([
    transforms.Resize(224), # change to what data should be
    transforms.CenterCrop(224), # trying what happens if no center crop cuz it cuts off larger images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#making dataset and dataloader (gives data to model, using batch_size 1 cuz google says that's good for sgd (stochastic gradient descent))
train_dataset = PlantDataset(data_dir, train, transform = transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True)

In [11]:
#chcek devices change to whatever you're using

#mps = (m1/m2 mac) gpu

print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())

True
True


In [19]:
from tqdm import tqdm # progress

#set device
device = torch.device('mps')

# Train the model
pretrained_model.train()
num_epochs = 20

for epoch in range(num_epochs):
    for data, label in tqdm(train_loader):
        # Move tensors to the configured device
        pretrained_model.to(device)
        images = data.to(device)
        labels = label.float().to(device) #.float() for some float64, float32 conversion thing, don't completely understand

        # Forward pass
        outputs = pretrained_model(images) #currently image tensor don't match fc layer if you use non pretrained model
        loss = criterion(outputs, labels) #calculate loss

        # Backward and optimize don't really understand this stuff
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print some statistics
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

100%|██████████| 900/900 [00:59<00:00, 15.04it/s]


Epoch [1/20], Loss: 0.8369


100%|██████████| 900/900 [00:58<00:00, 15.34it/s]


Epoch [2/20], Loss: 0.7691


100%|██████████| 900/900 [04:55<00:00,  3.05it/s]  


Epoch [3/20], Loss: 0.6604


100%|██████████| 900/900 [01:00<00:00, 15.00it/s]


Epoch [4/20], Loss: 0.1663


100%|██████████| 900/900 [09:10<00:00,  1.63it/s]  


Epoch [5/20], Loss: 0.7411


100%|██████████| 900/900 [00:59<00:00, 15.01it/s]


Epoch [6/20], Loss: 0.3939


100%|██████████| 900/900 [04:06<00:00,  3.66it/s]  


Epoch [7/20], Loss: 0.6762


100%|██████████| 900/900 [00:58<00:00, 15.44it/s]


Epoch [8/20], Loss: 0.7321


100%|██████████| 900/900 [02:20<00:00,  6.42it/s]


Epoch [9/20], Loss: 0.3207


100%|██████████| 900/900 [00:58<00:00, 15.26it/s]


Epoch [10/20], Loss: 0.6387


100%|██████████| 900/900 [00:59<00:00, 15.03it/s]


Epoch [11/20], Loss: 0.9708


100%|██████████| 900/900 [01:01<00:00, 14.55it/s]


Epoch [12/20], Loss: 0.3960


100%|██████████| 900/900 [00:58<00:00, 15.28it/s]


Epoch [13/20], Loss: 0.4152


100%|██████████| 900/900 [01:01<00:00, 14.70it/s]


Epoch [14/20], Loss: 0.6811


100%|██████████| 900/900 [00:59<00:00, 15.11it/s]


Epoch [15/20], Loss: 0.2546


100%|██████████| 900/900 [01:00<00:00, 14.88it/s]


Epoch [16/20], Loss: 0.5993


100%|██████████| 900/900 [08:47<00:00,  1.71it/s]   


Epoch [17/20], Loss: 0.3188


100%|██████████| 900/900 [00:59<00:00, 15.17it/s]


Epoch [18/20], Loss: 0.6566


100%|██████████| 900/900 [00:59<00:00, 15.14it/s]


Epoch [19/20], Loss: 0.5138


100%|██████████| 900/900 [01:46<00:00,  8.46it/s]

Epoch [20/20], Loss: 0.2486





In [18]:
#testing accuracy on test dataset
total_correct = 0
total_instances = 0

pretrained_model.eval()
# iterating through batches without updating gradients
with torch.no_grad():
    for images, labels in tqdm(train_loader):
      images = images.to(device)
      # labels = labels.float().to(device) # don't need this cuz we not training no more

      # making classifications and deriving indices of maximum value via argmax (which gives the max value i the tensor)
      solution_tensor = pretrained_model(images)
      classifications = torch.argmax(solution_tensor, dim = 1).item()

      #undoing one-hot encoding to get label value as a number
      label = np.where(labels.numpy() == 1)[1]

      correct_predictions = int(classifications==label)

      #  incrementing counters
      total_correct+=correct_predictions
      total_instances+=len(images)

#print accuracy
print(round(total_correct/total_instances, 3))

100%|██████████| 900/900 [00:50<00:00, 17.73it/s]

0.498





In [13]:
#loading validation dataset
val_dataset = PlantDataset(data_dir, val, transform = transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True)

In [14]:
total_correct = 0
total_instances = 0

pretrained_model.eval()
# iterating through batches without updating gradients
with torch.no_grad():
    for images, labels in tqdm(val_loader):
      images = images.to(device)
      # labels = labels.float().to(device) # don't need this cuz we not training no more

      # making classifications and deriving indices of maximum value via argmax (which gives the max value i the tensor)
      solution_tensor = pretrained_model(images)
      classifications = torch.argmax(solution_tensor, dim = 1).item()

      #undoing one-hot encoding to get label value as a number
      label = np.where(labels.numpy() == 1)[1]

      correct_predictions = int(classifications==label)

      #  incrementing counters
      total_correct+=correct_predictions
      total_instances+=len(images)

#print accuracy
print(round(total_correct/total_instances, 3))

  0%|          | 0/112 [00:00<?, ?it/s]

100%|██████████| 112/112 [00:07<00:00, 15.38it/s]

0.384





In [15]:
#loading test dataset
test_dataset = PlantDataset(data_dir, test, transform = transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True)

In [16]:
total_correct = 0
total_instances = 0

pictures = []
model_guesses = []
correct_answers = []

# iterating through batches without updating gradients
with torch.no_grad():
    for images, labels in tqdm(test_loader):
      pictures.append(images)
      images = images.to(device)

      # making classifications and deriving indices of maximum value via argmax (which gives the max value i the tensor)
      solution_tensor = pretrained_model(images)
      classifications = torch.argmax(solution_tensor, dim = 1).item()
      model_guesses.append(getLabel(classifications + 1)) # + 1 to convert from index to dict key

      #undoing one-hot encoding to get label value as a number
      label = np.where(labels.numpy() == 1)[1]
      correct_answers.append(getLabel(label + 1)) # + 1 to convert from index to dict key

      correct_predictions = int(classifications==label)

      #  incrementing counters
      total_correct+=correct_predictions
      total_instances+=len(images)

#print accuracy
print(round(total_correct/total_instances, 3))

100%|██████████| 113/113 [00:06<00:00, 18.65it/s]

0.46





In [17]:
test_indices = [random.randint(0, len(pictures)) for x in range(3)] # 3 is arbitray choose as many as you wanna show
transform = transforms.ToPILImage()
for test in test_indices:
    img = transform(torch.reshape(pictures[test], (3, 224, 224)))
    img.show()
    print('Our classifiction is: ', model_guesses[test], ' and the correct classification is ', correct_answers[test])

Our classifiction is:  Ulmus glabra  and the correct classification is  Betula pubescens
Our classifiction is:  Salix alba 'Sericea'  and the correct classification is  Ulmus glabra
Our classifiction is:  Populus tremula  and the correct classification is  Populus tremula
