In [None]:
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset,Subset
from torchvision import transforms,models,datasets
import torch
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
from datetime import datetime
import matplotlib.pyplot as plt
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split,WeightedRandomSampler
from torchvision.utils import make_grid
from sklearn.metrics import confusion_matrix
import cv2
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
import seaborn as sns
import itertools
import tensorflow as tf
import tensorflow_hub as hub
import shutil
from sklearn.model_selection import StratifiedShuffleSplit
#from torchsummary import summary

In [None]:
select_machine = int(input("1 : local machine\n2 : colab\n3 : kaggle\n"))

In [None]:
if select_machine == 1:
    train_image_path = '../input/plant-pathology-fgvc8-resized-images/train_images/train_images'
    test_image_path = '../input/plant-pathology-fgvc8-resized-images/test_images'
    train_df_path = '../input/plant-pathology-fgvc8-resized-images/train.csv'
    test_df_path = '../input/plant-pathology-fgvc8-resized-images/sample_submission.csv'
if select_machine == 2:
    train_image_path = '../input/plant-pathology-fgvc8-resized-images/train_images/train_images'
    test_image_path = '../input/plant-pathology-fgvc8-resized-images/test_images'
    train_df_path = '../input/plant-pathology-fgvc8-resized-images/train.csv'
    test_df_path = '../input/plant-pathology-fgvc8-resized-images/sample_submission.csv'
if select_machine == 3:
    train_image_path = '../input/plant-pathology-fgvc8-resized-images/train_images/train_images'
    test_image_path = '../input/plant-pathology-fgvc8-resized-images/test_images'
    train_df_path = '../input/plant-pathology-fgvc8-resized-images/train.csv'
    test_df_path = '../input/plant-pathology-fgvc8-resized-images/sample_submission.csv'

In [None]:
df_train = pd.read_csv(train_df_path)
print(df_train.head())
print('-----------------------')
source = df_train['labels'].value_counts()
print(source)

"""
forbid_label_list = ['frog_eye_leaf_spot complex','powdery_mildew complex','rust complex','rust frog_eye_leaf_spot','scab frog_eye_leaf_spot','cab frog_eye_leaf_spot complex','scab frog_eye_leaf_spot complex']
for i in forbid_label_list:    
    df_train = df_train.loc[df_train['labels']!=i]

print("-----------------------")
print(df_train.head())
print('-----------------------')
source = df_train['labels'].value_counts()
print(source)
"""

In [None]:
plt.figure(figsize=(15,12))
labels = sns.barplot(df_train.labels.value_counts().index,df_train.labels.value_counts())
for item in labels.get_xticklabels():
    item.set_rotation(45)

In [None]:
code_dict = {'scab':0,'healthy':1,'frog_eye_leaf_spot':2,'rust':3,'complex':4,'powdery_mildew':5,'scab frog_eye_leaf_spot':6,'scab frog_eye_leaf_spot complex':7,'frog_eye_leaf_spot complex':8,'rust frog_eye_leaf_spot':9,'rust complex':10,'powdery_mildew complex':11}
label_list = list(code_dict)
df_train['codes'] = df_train['labels'].map(code_dict)
print(df_train.head())
print('__________________________')
print(label_list)
print('__________________________')
for i in zip(list(code_dict.keys()),list(code_dict.values())):
    print(str(i[1])+' ----> '+i[0])

In [None]:
fig = go.Figure(data=[go.Pie(labels=source.index,values=source.values)])
fig.update_layout(title='Label distribution')
fig.show()

In [None]:
class PathologyPlantsDataset(Dataset):  #  <--- جنس پارامتر ورودی از نوع دیتاست برگرفته از کتابخانه دیتاست پایتورچ می باشد
  """
  The Class will act as the container for our dataset. It will take your dataframe, the root path, and also the transform function for transforming the dataset.
  """
  def __init__(self, data_frame, root_dir, transform=None):
        self.data_frame = data_frame
        self.root_dir = root_dir
        self.transform = transform
    
  def __len__(self):
        # Return the length of the dataset
        return len(self.data_frame)
    
  def __getitem__(self, idx):
        # Return the observation based on an index. Ex. dataset[0] will return the first element from the dataset, in this case the image and the label.
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name)
        label = int(self.data_frame.iloc[idx, 2])
        
        if self.transform:
            image = self.transform(image)
    
        return (image, label)

In [None]:
transform = transforms.Compose([transforms.RandomHorizontalFlip(),transforms.RandomVerticalFlip(),
                                #transforms.ColorJitter(brightness=.2, hue=.1),
                                #transforms.RandomAdjustSharpness(sharpness_factor=2),
                                #transforms.RandomPerspective(distortion_scale=0.3,p=0.2),
                                #transforms.RandomRotation(degrees=(0,180)),
                                #transforms.RandomAffine(),
                                #transforms.RandomResizedCrop(size=(224,224)),
                                transforms.Resize(size=(299,299)),
                                transforms.ToTensor(),
                                #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                                ])
pathology_train = PathologyPlantsDataset(data_frame=df_train,root_dir=train_image_path,transform=transform)
print(pathology_train)

In [None]:
temp_img, temp_label = pathology_train[3]
plt.imshow(temp_img.numpy().transpose((1, 2, 0)))
plt.title(label_list[temp_label])
#plt.title(temp_lab)
#plt.axis('off')
plt.show()

In [None]:
batch_size = 64   #  <------------------
split_ratio = 0.8
train_size = int(len(pathology_train)*split_ratio)
validation_size = len(pathology_train) - train_size
#sampler_weight = 1/source
#sampler = WeightedRandomSampler(sampler_weight, train_size)

train_dataset, valid_dataset = random_split(pathology_train, [train_size, validation_size] )
#load the train and validation into batches.
#train_dl = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, sampler = sampler, pin_memory = True)  # , pin_memory = True num_workers = 1, shuffle = True
train_dl = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, pin_memory = True)  # , pin_memory = True num_workers = 1, shuffle = True
val_dl = DataLoader(valid_dataset, batch_size = batch_size, shuffle = True, pin_memory = True)  # , pin_memory = True num_workers = 1, shuffle = True

print(f"Length of Dataset : {len(pathology_train)}")
print(f"Length of Train Data : {len(train_dataset)}")
print(f"Length of Validation Data : {len(valid_dataset)}")

In [None]:
def show_batch(dl):
    """Plot images grid of single batch"""
    for images, labels in dl:
        fig,ax = plt.subplots(figsize = (16,12))
        ax.set_xticks([])
        ax.set_yticks([])
        ax.imshow(make_grid(images,nrow=16).permute(1,2,0))
        break

  
show_batch(train_dl)

In [None]:
targets_size=12
model = models.inception_v3(pretrained=True)
for param in model.parameters():
            param.requires_grad = True
print(model)
model.fc = nn.Linear(2048, targets_size) 
model.AuxLogits.fc = nn.Linear(768, targets_size)     #Auxilary layer: For Inception_v3
print(model.fc)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
device = "cuda"
model.to(device)

In [None]:
#criterion = nn.MultiLabelSoftMarginLoss()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0005)

In [None]:
train_losses = []
validation_losses = []

In [None]:
def batch_gd(model, criterion, train_loader, validation_loader, epochs):
       
    prev_accuracy = 86.0
    #weights.to(device)
    for e in range(epochs):
        t0 = datetime.now()
        train_n_correct = 0
        train_n_total = 0
        validation_n_correct = 0
        validation_n_total = 0
        train_cm = torch.zeros(targets_size,targets_size,dtype=torch.int64)
        validation_cm = torch.zeros(targets_size,targets_size,dtype=torch.int64)
        
        train_loss = []
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            #output = model(inputs)
            output, aux_output = model(inputs)  #for Inception_v3
            loss1 = criterion(output, targets)      #for Inception_v3
            loss2 = criterion(aux_output, targets)     #for Inception_v3
            if loss1<loss2 :
                loss = loss1
            else :
                loss = loss2
            train_loss.append(loss.item())
            
            _, predictions = torch.max(output, 1)
            train_n_correct += (predictions == targets).sum().item()
            train_n_total += targets.shape[0]
            
            for t, p in zip(targets.view(-1), predictions.view(-1)):
                train_cm[t.long(), p.long()] += 1
                
            loss.backward()
            optimizer.step()

        train_loss = np.mean(train_loss)
        train_accuracy = train_n_correct/train_n_total
        

        validation_loss = []
        for inputs, targets in validation_loader:

            inputs, targets = inputs.to(device), targets.to(device)

            #output = model(inputs)
            output, aux_output = model(inputs)  #for Inception_v3 
            loss = criterion(output, targets)
            
            validation_loss.append(loss.item())

            _, predictions = torch.max(output, 1)
            validation_n_correct += (predictions == targets).sum().item()
            validation_n_total += targets.shape[0]
            
            for t, p in zip(targets.view(-1), predictions.view(-1)):
                validation_cm[t.long(), p.long()] += 1

        validation_loss = np.mean(validation_loss)
        validation_accuracy = validation_n_correct/validation_n_total

        
        train_losses.append(train_loss)
        validation_losses.append(validation_loss)
        dt = datetime.now() - t0
        print('----------------------------------------------------------------------------------------')
        print(f"Epoch : {e+1}/{epochs} Train_loss:{train_loss:.3f} Validation_loss:{validation_loss:.3f} Duration:{dt} Train_Acc:{train_accuracy*100:.2f} Validation_Acc:{validation_accuracy*100:.2f}")
        print(train_cm)
        print(validation_cm)

        if validation_accuracy > prev_accuracy:
          prev_accuracy = validation_accuracy
          if select_machine == 2 : #colab
            os.chdir('/content/drive/MyDrive') 
          if select_machine == 3 : #kaggle
            os.chdir('/kaggle/working')  
          if select_machine == 1 : #local
            os.chdir('C:/Local Machine/Cassava plant disease detection/...saves1')
          torch.save(model.state_dict() , 'plant_disease_model_1.pt')
          torch.save(train_dl , 'train_dl1.pth')
          torch.save(val_dl , 'val_dl1.pth')
          print("model saved !")

    return train_losses, validation_losses

In [None]:
model.train()
train_losses, validation_losses = batch_gd(model, criterion, train_dl, val_dl, 1)

In [None]:
model.train()
train_losses, validation_losses = batch_gd(model, criterion, train_dl, val_dl, 10)

In [None]:
model.train()
train_losses, validation_losses = batch_gd(model, criterion, train_dl, val_dl, 19)

In [None]:
plt.plot(train_losses , label = 'train_loss')
plt.plot(validation_losses , label = 'validation_loss')
plt.xlabel('No of Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
plt.figure(figsize=(10,10))
plot_confusion_matrix(train_cm, label_names)

In [None]:
plt.figure(figsize=(10,10))
plot_confusion_matrix(validation_cm, label_names)

# **Save Model, Data Loaders**

In [None]:
if select_machine == 1: os.chdir('C:/Local Machine/plant-pathology-fgvc8-resized-images/saves')   #local
if select_machine == 2: os.chdir('/content/drive/MyDrive')   #colab
if select_machine == 3: os.chdir('/kaggle/working')   #kaggle
torch.save(model.state_dict() , 'plant_disease_model_1.pt')
torch.save(train_dl , 'train_dl1.pth')
torch.save(val_dl , 'val_dl1.pth')

# **Load Model, Data Loaders**

In [None]:
if select_machine == 1: os.chdir('C:/Local Machine/plant-pathology-fgvc8-resized-images/saves')   #local
if select_machine == 2: os.chdir('/content/drive/MyDrive')   #colab
if select_machine == 3: os.chdir('/kaggle/working')   #kaggle
targets_size = 12
model.load_state_dict(torch.load("plant_disease_model_1.pt"))
train_dl = torch.load("train_dl1.pth")
val_dl = torch.load("val_dl1.pth")