In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import os
import re
from PIL import Image
from sklearn.manifold import TSNE
import numpy as np
from pytorch_msssim import SSIM


In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [17]:
train_path = "/Users/nguyenphan/Developer/Leaf-Anomaly-Detection/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"
#train_path = "/Users/nguyenphan/Developer/Leaf-Anomaly-Detection/grape-only-dataset/train"

In [4]:
def get_label(name):
  label = 0
  if bool(re.match('Grape___E.+', name)):
    label = 1
  elif bool(re.match('Grape___L.+', name)):
    label = 2
  elif bool(re.match('Grape___h.+', name)):
    label = 3
  return label

In [18]:
convert_tensor = transforms.ToTensor()
train_data = []
for root, dirs, files in os.walk(train_path):
  class_name = root.split('/')[-1]
  if bool(re.match('Grape.+', class_name)):
    for file in files:
      path = os.path.join(root, file)
      img = Image.open(path)
      tensor_img = convert_tensor(img)
      tensor_img = tensor_img.to(device)
      label = get_label(class_name)
      train_data.append([tensor_img, label])

print(len(train_data))

train_data[0][0].is_cuda

7222


False

In [19]:
# class Autoencoder(nn.Module):
#   def __init__(self):
#     super().__init__()
#     self.encoder = nn.Sequential(
#         nn.Conv2d(3, 16, kernel_size=7, stride=2, padding=1),
#         nn.ReLU(),
#         nn.Conv2d(16, 32, kernel_size=7, stride=2, padding=1),
#         nn.ReLU(),
#         nn.Conv2d(32, 64, kernel_size=7, stride=2, padding=1),
#         nn.ReLU(),
#         nn.Conv2d(64, 128, kernel_size=5)
#     )

#     self.decoder = nn.Sequential(
#         nn.ConvTranspose2d(128, 64, kernel_size=5),
#         nn.ReLU(),
#         nn.ConvTranspose2d(64, 32, kernel_size=7, stride=2, padding=1),
#         nn.ReLU(),
#         nn.ConvTranspose2d(32, 16, kernel_size=7, stride=2, padding=1, output_padding=1),
#         nn.ReLU(),
#         nn.ConvTranspose2d(16, 3, kernel_size=7, stride=2, padding=1, output_padding=1),
#         nn.Sigmoid() # As we saw that the input tensors are between 0 and 1 so we should use an activation function to map our values to that range.
#     )
    
#   def forward(self, x):
#     encoded = self.encoder(x)
#     decoded = self.decoder(encoded)
#     return decoded
  
# BatchNorm2d improving training stability and convergence.
class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=7, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=7, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=7, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=5),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=5),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=7, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, kernel_size=7, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 3, kernel_size=7, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()  # Output in [0, 1] to match input range
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [20]:
#Loss function
class SSIMLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.ssim_module = SSIM(data_range=1.0, size_average=True, channel=3)
    
    def forward(self, y_pred, y_true):
        return 1 - self.ssim_module(y_pred, y_true)

In [21]:
model = Autoencoder().to(device)
criterion = SSIMLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
checkpoint_path = "/Users/nguyenphan/Developer/Leaf-Anomaly-Detection/checkpoint.pth"
train_loader = torch.utils.data.DataLoader(
    dataset= train_data,
    batch_size=16
)

In [22]:
import os
checkpoint_path = "/Users/nguyenphan/Developer/Leaf-Anomaly-Detection/checkpoint.pth"
if os.path.exists(checkpoint_path):
    os.remove(checkpoint_path)
    print("Corrupted checkpoint removed.")

In [23]:
def train_model(model, criterion, optimizer, train_loader, num_epochs, device,
                checkpoint_path = '/Users/nguyenphan/Developer/Leaf-Anomaly-Detection/checkpoint.pth'):
    start_epoch = 0
    prev_loss = 0.0
    
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        prev_loss = checkpoint['loss']
        print(f"Resuming training from epoch {start_epoch}, previous loss: {prev_loss:.4f}")
    else:
        print("Starting training from scratch")
        
    try:
        for epoch in range(start_epoch, num_epochs):
            for (img, _) in train_loader:
                output = model(img)
                loss = criterion(output, img)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
            
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss.item()
            }, checkpoint_path)
    
    except KeyboardInterrupt:
        print("Training interrupted, saving checkpoint...")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss.item()
        }, checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")
        return
    
    torch.save(model.state_dict(), 'autoencoder_final.pth')
    print("Training completed, final model saved as 'autoencoder_final.pth'")

In [26]:
num_epochs = 1000
train_model(model, criterion, optimizer, train_loader, num_epochs, device)

Resuming training from epoch 59, previous loss: 0.1791
Epoch [60/1000], Loss: 0.2096
Epoch [61/1000], Loss: 0.2088
Epoch [62/1000], Loss: 0.2084
Epoch [63/1000], Loss: 0.2071
Epoch [64/1000], Loss: 0.2066
Epoch [65/1000], Loss: 0.2064
Epoch [66/1000], Loss: 0.2063
Epoch [67/1000], Loss: 0.2056
Epoch [68/1000], Loss: 0.2051
Epoch [69/1000], Loss: 0.2046
Epoch [70/1000], Loss: 0.2042
Epoch [71/1000], Loss: 0.2041
Epoch [72/1000], Loss: 0.2037
Epoch [73/1000], Loss: 0.2034
Epoch [74/1000], Loss: 0.2031
Epoch [75/1000], Loss: 0.2028
Epoch [76/1000], Loss: 0.2022
Epoch [77/1000], Loss: 0.2017
Epoch [78/1000], Loss: 0.2018
Epoch [79/1000], Loss: 0.2013
Epoch [80/1000], Loss: 0.2011
Epoch [81/1000], Loss: 0.2014
Epoch [82/1000], Loss: 0.2008
Epoch [83/1000], Loss: 0.2008
Epoch [84/1000], Loss: 0.2003
Epoch [85/1000], Loss: 0.2002
Epoch [86/1000], Loss: 0.2001
Epoch [87/1000], Loss: 0.2001
Epoch [88/1000], Loss: 0.1995
Epoch [89/1000], Loss: 0.1995
Epoch [90/1000], Loss: 0.1989
Epoch [91/1000]