In [None]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
# Imports here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import cv2

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models

In [None]:
data_dir = 'PyTorch_Image_Classifier/flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'

In [None]:
!pip install kaggle 

In [None]:
!pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
import os
os.environ['KAGGLE_USERNAME'] = "" # username from the json file
os.environ['KAGGLE_KEY'] = "" # key from the json file
!kaggle competitions download -c cassava-leaf-disease-classification # api copied from kaggle

In [None]:
!unzip cassava-leaf-disease-classification.zip

In [None]:
df = pd.read_csv("train.csv")

In [None]:
df["label"].value_counts()

In [None]:
import seaborn as sns
sns.barplot(x="label", y=df["label"].value_counts() ,data=df);

In [None]:
sns.displot(df, x=df["label"])
#plt.bar( height=df["label"].value_counts().sort_values())


In [None]:
df["label"].unique()

In [None]:
np.sort(df["label"].value_counts().sort_values())

In [None]:
df["label"].value_counts().sort_values()/sum(df["label"].value_counts())

In [None]:
df["label"].value_counts().sort_values()

In [None]:
df

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df["image_id"], df["label"], test_size=0.20, random_state=42)

In [None]:
#y_test.value_counts().sort_values()
#X_train.count()
#y_train

In [None]:
X_test

In [None]:
#for training and validing
train_data, val_data, train_label, val_label = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

## Skip that part##

In [None]:
path = "/content/train_images"

In [None]:
names_of_disease = pd.read_json('label_num_to_disease_map.json', typ='series')
names_of_disease

In [None]:
from PIL import Image
plt.figure(figsize=(16, 12))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    image = Image.open('train_images/' + train_data.iloc[i])
    array = np.array(image)
    plt.imshow(array)
    label=train_label.iloc[i]
    plt.title(f'{names_of_disease[int(label)]}')
plt.show()

## ########################################3

In [None]:
!mkdir "train_i"
!mkdir "val_i"
!mkdir "test_i" 

In [None]:
for label in range(5):
  if not os.path.exists("train_i/"+str(label)):
    os.makedirs("train_i/"+str(label))
  
  if not os.path.exists("val_i/"+str(label)):
    os.makedirs("val_i/"+str(label))
  
  if not os.path.exists("test_i/"+str(label)):
    os.makedirs("test_i/"+str(label))

In [None]:
import os
import shutil 
import os

for x,label in zip(train_data,train_label):
  #cmd = f"cp -a /train_images/{x}/. /train_1/"

  send_path = "/content/train_images/"+x
  print(send_path)
  #!mv send_path "/train_1"
  shutil.move(send_path, "/content/train_i/"+str(label)) 
  

In [None]:
for x,label in zip(val_data,val_label):
  #cmd = f"cp -a /train_images/{x}/. /train_1/"
  send_path = "/content/train_images/"+x
  print(send_path)
  #!mv send_path "/train_1"
  shutil.move(send_path, "/content/val_i/"+str(label)) 

In [None]:
for x,label in zip(X_test,y_test):
  #cmd = f"cp -a /train_images/{x}/. /train_1/"
  send_path = "/content/train_images/"+x
  print(send_path)
  #!mv send_path "/train_1"
  shutil.move(send_path, "/content/test_i/"+str(label)) 

In [None]:
training_transforms = transforms.Compose([transforms.RandomRotation(30),
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], 
                                                               [0.229, 0.224, 0.225])])

validation_transforms = transforms.Compose([transforms.Resize(256),
                                            transforms.CenterCrop(224),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                                 [0.229, 0.224, 0.225])])

testing_transforms = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], 
                                                              [0.229, 0.224, 0.225])])

# TODO: Load the datasets with ImageFolder
training_dataset = datasets.ImageFolder("train_i/", transform=training_transforms)
validation_dataset = datasets.ImageFolder("val_i/", transform=validation_transforms)
testing_dataset = datasets.ImageFolder("test_i/", transform=testing_transforms)

# TODO: Using the image datasets and the trainforms, define the dataloaders
train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=64, shuffle=True)
validate_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=32)
test_loader = torch.utils.data.DataLoader(testing_dataset, batch_size=32)

#TIME TO BUILD THE MODEL

In [None]:
# Build and train your network
# Transfer Learning
model = models.vgg16(pretrained=True)
model

In [None]:

# Freeze pretrained model parameters to avoid backpropogating through them
for parameter in model.parameters():
    parameter.requires_grad = False


from collections import OrderedDict

# Build custom classifier
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(25088, 5000)),
                                        ('relu', nn.ReLU()),
                                        ('drop', nn.Dropout(p=0.5)),
                                        ('fc2', nn.Linear(5000, 5)),
                                        ('output', nn.LogSoftmax(dim=1))]))

model.classifier = classifier

In [None]:
# Function for the validation pass
def validation(model, validateloader, criterion):
    
    val_loss = 0
    accuracy = 0
    
    for images, labels in iter(validateloader):

        images, labels = images.to('cuda'), labels.to('cuda')

        output = model.forward(images)
        val_loss += criterion(output, labels).item()

        probabilities = torch.exp(output)
        
        equality = (labels.data == probabilities.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return val_loss, accuracy

In [None]:
train_label.value_counts()

In [None]:
#train_data
(1 - train_label.value_counts()/len(train_label)) / sum(1 - train_label.value_counts()/len(train_label))
#len(train_label)

In [None]:
# Loss function and gradient descent
weights = [0.24, 0.22, 0.22, 0.10, 0.22]
class_weights = torch.FloatTensor(weights).cuda()
#self.criterion = nn.CrossEntropyLoss(weight=class_weights)


criterion = nn.NLLLoss(weight=class_weights)

optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001)

In [None]:
# Train the classifier

#from workspace_utils import active_session

def train_classifier():

    #with active_session():

      epochs = 10
      steps = 0
      print_every = 40

      model.to('cuda')

      for e in range(epochs):
      
          model.train()
  
          running_loss = 0
  
          for images, labels in iter(train_loader):
      
              steps += 1
      
              images, labels = images.to('cuda'), labels.to('cuda')
      
              optimizer.zero_grad()
      
              output = model.forward(images)
              loss = criterion(output, labels)
              loss.backward()
              optimizer.step()
      
              running_loss += loss.item()
      
              if steps % print_every == 0:
              
                  model.eval()
              
                  # Turn off gradients for validation, saves memory and computations
                  with torch.no_grad():
                      validation_loss, accuracy = validation(model, validate_loader, criterion)
          
                  print("Epoch: {}/{}.. ".format(e+1, epochs),
                        "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                        "Validation Loss: {:.3f}.. ".format(validation_loss/len(validate_loader)),
                        "Validation Accuracy: {:.3f}".format(accuracy/len(validate_loader)))
          
                  running_loss = 0
                  model.train()
                  
train_classifier()

In [None]:
def test_accuracy(model, test_loader):

    # Do validation on the test set
    model.eval()
    model.to('cuda')

    with torch.no_grad():
    
        accuracy = 0
    
        for images, labels in iter(test_loader):
    
            images, labels = images.to('cuda'), labels.to('cuda')
    
            output = model.forward(images)

            probabilities = torch.exp(output)
        
            equality = (labels.data == probabilities.max(dim=1)[1])
        
            accuracy += equality.type(torch.FloatTensor).mean()
        
        print("Test Accuracy: {}".format(accuracy/len(test_loader)))    
        
        
test_accuracy(model, test_loader)

In [None]:
def save_checkpoint(model):

    model.class_to_idx = training_dataset.class_to_idx

    checkpoint = {'arch': "vgg16",
                  'class_to_idx': model.class_to_idx,
                  'model_state_dict': model.state_dict()
                 }

    torch.save(checkpoint, 'checkpoint.pth')
    
save_checkpoint(model)

In [None]:
from collections import OrderedDict

# Function that loads a checkpoint and rebuilds the model

def load_checkpoint(filepath):
    
    checkpoint = torch.load(filepath)
    
    if checkpoint['arch'] == 'vgg16':
        
        model = models.vgg16(pretrained=True)
        
        for param in model.parameters():
            param.requires_grad = False
    else:
        print("Architecture not recognized.")
    
    model.class_to_idx = checkpoint['class_to_idx']
    
    classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(25088, 5000)),
                                            ('relu', nn.ReLU()),
                                            ('drop', nn.Dropout(p=0.5)),
                                            ('fc2', nn.Linear(5000, 5)),
                                            ('output', nn.LogSoftmax(dim=1))]))

    model.classifier = classifier
    
    model.load_state_dict(checkpoint['model_state_dict'])
    
    return model

#model = load_checkpoint('checkpoint.pth')
#print(model)