# Code for running SiameseNet and TripletNet on BRACOL Dataset

***The code used in this notebook is mostly from [this](https://github.com/adambielski/siamese-triplet/). If you are interested in it, check it out, it is extremely well documented***

***Install libs***

In [None]:
!pip install learn2learn
!pip install efficientnet_pytorch

from IPython.display import clear_output 
clear_output()

print('Done!')

Done!


***Import some libs***

In [None]:
import sys
sys.path.insert(0, '/content/drive/MyDrive/pg/siamese_triplet_net/src/')
import torch
device = torch.cuda.is_available()
from dataloaders import get_train_transforms, get_val_transforms, get_siamese_dataloader, get_triplet_dataloader
from networks import SiameseNet, TripletNet 
from models import *
from losses import ContrastiveLoss, TripletLoss
from trainer import fit
import torchvision



***Define model hiperparams***

In [None]:
# model & optimizer & lr_scheduler
embedding_net = MobileNetv2()
model = TripletNet(embedding_net=embedding_net)

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
loss_fn = TripletLoss(1.)
n_epochs=100

if device:
    model.cuda()

log_interval = 100

# path to data
path_data = '/content/drive/MyDrive/pg/dataset/'

# define siamese train and val loaders
# this loader is implemented for datasets in ImageFolder format (https://pytorch.org/vision/stable/datasets.html#imagefolder)
triplet_train_loader = get_triplet_dataloader(root=path_data + '/train/', batch_size=32, transforms=get_train_transforms())
triplet_val_loader = get_triplet_dataloader(root=path_data + '/val/', batch_size=32, transforms=get_val_transforms())


In [None]:
fit(triplet_train_loader, triplet_val_loader, model, loss_fn, optimizer, lr_scheduler, n_epochs, device, log_interval)

Epoch: 1/100. Train set: Average loss: 0.8433
Epoch: 1/100. Validation set: Average loss: 0.6843
Estimated time of epoch: 46.43 s
ETA:77.38 min
Epoch: 2/100. Train set: Average loss: 0.5798
Epoch: 2/100. Validation set: Average loss: 0.3653
Estimated time of epoch: 46.33 s
ETA:76.44 min
Epoch: 3/100. Train set: Average loss: 0.3377
Epoch: 3/100. Validation set: Average loss: 0.2400
Estimated time of epoch: 46.19 s
ETA:75.44 min
Epoch: 4/100. Train set: Average loss: 0.2668
Epoch: 4/100. Validation set: Average loss: 0.1888
Estimated time of epoch: 45.99 s
ETA:74.35 min
Epoch: 5/100. Train set: Average loss: 0.1921
Epoch: 5/100. Validation set: Average loss: 0.1034
Estimated time of epoch: 46.18 s
ETA:73.89 min
Epoch: 6/100. Train set: Average loss: 0.1678
Epoch: 6/100. Validation set: Average loss: 0.0773
Estimated time of epoch: 46.3 s
ETA:73.31 min
Epoch: 7/100. Train set: Average loss: 0.1343
Epoch: 7/100. Validation set: Average loss: 0.0854
Estimated time of epoch: 46.08 s
ETA:72.

In [None]:
# from: https://github.com/avilash/pytorch-siamese-triplet/blob/master/tsne.py
import cv2
import numpy as np
from sklearn.manifold import TSNE
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import transforms
from torch.autograd import Variable
import os
import pandas as pd
import seaborn as sns
def generate_embeddings(data_loader, model):
    with torch.no_grad():
        device = 'cuda'
        model.eval()
        model.to(device)
        labels = None
        embeddings = None
        for batch_idx, data in tqdm(enumerate(data_loader)):
            batch_imgs, batch_labels = data
            batch_labels = batch_labels.numpy()
            batch_imgs = Variable(batch_imgs.to('cuda'))
            batch_E = model.get_embedding(batch_imgs)
            batch_E = batch_E.data.cpu().numpy()
            embeddings = np.concatenate((embeddings, batch_E), axis=0) if embeddings is not None else batch_E
            labels = np.concatenate((labels, batch_labels), axis=0) if labels is not None else batch_labels
    return embeddings, labels

def vis_tSNE(embeddings, labels, backbone='Convnet'):
    num_samples = embeddings.shape[0]
    X_embedded = TSNE(n_components=2).fit_transform(embeddings[0:num_samples, :])
    plt.figure(figsize=(16, 16))
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    labels_name = ['Healthy', 'Miner', 'Rust', 'Phoma', 'Cercospora']
    for i in range(5):
        inds = np.where(labels==i)[0]
        plt.scatter(X_embedded[inds,0], X_embedded[inds,1], alpha=.8, color=colors[i], s=200)
    # plt.title(f't-SNE', fontweight='bold', fontsize=24)
    plt.legend(labels_name, fontsize=30)
    plt.savefig(f'./tsne_{backbone}.png')



In [None]:
test_data = torchvision.datasets.ImageFolder(root=path_data + '/test/', transform=get_val_transforms())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1)

val_embeddings_cl, val_labels_cl = generate_embeddings(test_loader, model)
vis_tSNE(val_embeddings_cl, val_labels_cl)

66it [00:00, 104.09it/s]

In [None]:
train_data = torchvision.datasets.ImageFolder(root=path_data + '/train/', transform=get_val_transforms())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32)

test_data = torchvision.datasets.ImageFolder(root=path_data + '/test/', transform=get_val_transforms())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)

x_train, y_train = generate_embeddings(train_loader, model)
x_test, y_true = generate_embeddings(test_loader, model)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier

classifier = KNeighborsClassifier(n_neighbors=1)
# classifier = SVC()
# classifier = SGDClassifier()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

accuracy = round(accuracy_score(y_true, y_pred)*100, 2)
precision = round(precision_score(y_true, y_pred, average='macro')*100, 2)
recall = round(recall_score(y_true, y_pred, average='macro')*100, 2)
f1 = round(f1_score(y_true, y_pred, average='macro')*100, 2)
print(f'--- Results for MobileNetv2 Embeddings on KNN (k = 1) ---')
print(f'Accuracy Score:{accuracy}')
print(f'Precision Score: {precision}')
print(f'Recall Score: {recall}')
print(f'F1 Score: {f1}')

49it [00:07,  6.34it/s]
11it [00:01,  6.70it/s]


--- Results for MobileNetv2 Embeddings on KNN (k = 1) ---
Accuracy Score:96.12
Precision Score: 96.08
Recall Score: 95.87
F1 Score: 95.96
