In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use("ggplot")

import torch
import torch.nn as nn

import timm

import gc
import os
import time
import random
from datetime import datetime

from PIL import Image
from tqdm.notebook import tqdm
from sklearn import model_selection, metrics

In [24]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [25]:
os.listdir()

['PretrainedVIT_CIFAR_Neha.ipynb',
 'torch-1.7-cp37-cp37m-linux_x86_64.whl',
 'torch_xla-1.7-cp37-cp37m-linux_x86_64.whl',
 'cassava-leaf-disease-classification',
 'pytorch-xla-env-setup.py',
 '.jovianrc',
 'Fashion_MNIST.ipynb',
 'jx_vit_base_p16_224-80ecf9dd.pth',
 '.ipynb_checkpoints',
 'PretrainedVIT_AS_Cassava.ipynb',
 'Pretrained.ipynb',
 'CIFAR10.ipynb',
 'torchvision-1.7-cp37-cp37m-linux_x86_64.whl',
 'PretrainedVIT_CIFAR.ipynb',
 'Cassava.ipynb']

## CASSAVA Final - 40 eps

In [26]:
MODEL_PATH = ("jx_vit_base_p16_224-80ecf9dd.pth")

In [27]:
# For parallelization in TPUs
os.environ["XLA_USE_BF16"] = "1"
os.environ["XLA_TENSOR_ALLOCATOR_MAXSIZE"] = "100000000"

In [28]:
# general global variables
DATA_PATH = "cassava-leaf-disease-classification"
TRAIN_PATH = "cassava-leaf-disease-classification/train_images"
TEST_PATH = "cassava-leaf-disease-classification/test_images"
MODEL_PATH = ("jx_vit_base_p16_224-80ecf9dd.pth")
# model specific global variables

## Cassava
IMG_SIZE = 224
BATCH_SIZE = 16
LR = 0.001
# GAMMA = 0.7
N_EPOCHS = 40 # 10



In [29]:
df = pd.read_csv("cassava-leaf-disease-classification/train.csv")

In [30]:
import torchvision
from torchvision.transforms import ToTensor
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

class CassavaDataset(torch.utils.data.Dataset):
    """
    Helper Class to create the pytorch dataset
    """

    def __init__(self, df, data_path=DATA_PATH, mode="train", transforms=None):
        super().__init__()
        self.df_data = df.values
        self.data_path = data_path
        self.transforms = transforms
        self.mode = mode
        self.data_dir = "train_images" if mode == "train" else "test_images"

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        img_name, label = self.df_data[index]
        img_path = os.path.join(self.data_path, self.data_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        if self.transforms is not None:
            image = self.transforms(img)

        return image, label


In [31]:
train_df, valid_df = model_selection.train_test_split(
    df, test_size=0.1, random_state=42, stratify=df.label.values
)

# create image augmentations
transformations = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)),transforms.ToTensor()])

train_dataset = CassavaDataset(train_df, transforms=transformations)
valid_dataset = CassavaDataset(valid_df, transforms=transformations)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,drop_last=True,num_workers=8)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,batch_size=BATCH_SIZE,drop_last=True,num_workers=8)

  cpuset_checked))


In [32]:
class ViTBase16(nn.Module):
    def __init__(self, n_classes, pretrained=False):

        super(ViTBase16, self).__init__()

        self.model = timm.create_model("vit_base_patch16_224", pretrained=False)
        if pretrained:
            self.model.load_state_dict(torch.load(MODEL_PATH))

        self.model.head = nn.Linear(self.model.head.in_features, n_classes)

    def forward(self, x):
        x = self.model(x)
        return x

    def train_one_epoch(self, train_loader, criterion, optimizer, device):
        # keep track of training loss
        epoch_loss = 0.0
        epoch_accuracy = 0.0

        ###################
        # train the model #
        ###################
        self.model.train()
        for i, (data, target) in enumerate(train_loader):
            if device.type == "cuda":
                data, target = data.cuda(), target.cuda()

            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = self.forward(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # Calculate Accuracy
            accuracy = (output.argmax(dim=1) == target).float().mean()
            # update training loss and accuracy
            epoch_loss += loss
            epoch_accuracy += accuracy
            optimizer.step()

            # perform a single optimization step (parameter update)
           
            if i % 20 == 0:
                print(f"\tBATCH {i+1}/{len(train_loader)} - LOSS: {loss}")
                

        return epoch_loss / len(train_loader), epoch_accuracy / len(train_loader)

    def validate_one_epoch(self, valid_loader, criterion, device):
        # keep track of validation loss
        valid_loss = 0.0
        valid_accuracy = 0.0

        ######################
        # validate the model #
        ######################
        self.model.eval()
        for data, target in valid_loader:
            # move tensors to GPU if CUDA is available
            if device.type == "cuda":
                data, target = data.cuda(), target.cuda()

            with torch.no_grad():
                # forward pass: compute predicted outputs by passing inputs to the model
                output = self.model(data)
                # calculate the batch loss
                loss = criterion(output, target)
                # Calculate Accuracy
                accuracy = (output.argmax(dim=1) == target).float().mean()
                # update average validation loss and accuracy
                valid_loss += loss
                valid_accuracy += accuracy

        return valid_loss / len(valid_loader), valid_accuracy / len(valid_loader)
    

In [33]:
def fit_tpu(
    model, epochs, device, criterion, optimizer, train_loader, valid_loader=None
):

    valid_loss_min = np.Inf  # track change in validation loss

    # keeping track of losses as it happen
    train_losses = []
    valid_losses = []
    train_accs = []
    valid_accs = []

    for epoch in range(1, epochs + 1):
        gc.collect()

        print(f"{'='*50}")
        print(f"EPOCH {epoch} - TRAINING...")
        train_loss, train_acc = model.train_one_epoch(
            train_loader, criterion, optimizer, device
        )
        print(
            f"\n\t[TRAIN] EPOCH {epoch} - LOSS: {train_loss}, ACCURACY: {train_acc}\n"
        )
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        gc.collect()

        if valid_loader is not None:
            gc.collect()
            print(f"EPOCH {epoch} - VALIDATING...")
            valid_loss, valid_acc = model.validate_one_epoch(
                valid_loader, criterion, device
            )
            print(f"\t[VALID] LOSS: {valid_loss}, ACCURACY: {valid_acc}\n")
            valid_losses.append(valid_loss)
            valid_accs.append(valid_acc)
            gc.collect()

            # save model if validation loss has decreased
            if valid_loss <= valid_loss_min and epoch != 1:
                print(
                    "Validation loss decreased ({:.4f} --> {:.4f}).  Saving model ...".format(
                        valid_loss_min, valid_loss
                    )
                )
            #                 xm.save(model.state_dict(), 'best_model.pth')

            valid_loss_min = valid_loss

    return {
        "train_loss": train_losses,
        "valid_losses": valid_losses,
        "train_acc": train_accs,
        "valid_acc": valid_accs,
    }

In [34]:
model = ViTBase16(n_classes=5, pretrained=True)


In [None]:
criterion = nn.CrossEntropyLoss()
model.to(device)

lr = LR 
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

print(f"INITIALIZING TRAINING")
start_time = datetime.now()
print(f"Start Time: {start_time}")

logs = fit_tpu(
        model=model,
        epochs=N_EPOCHS,
        device=device,
        criterion=criterion,
        optimizer=optimizer,
        train_loader=train_loader,
        valid_loader=valid_loader,
    )

print(f"Execution time: {datetime.now() - start_time}")

INITIALIZING TRAINING
Start Time: 2022-05-12 18:47:15.680916
EPOCH 1 - TRAINING...
	BATCH 1/1203 - LOSS: 1.715251088142395
	BATCH 21/1203 - LOSS: 1.236167073249817
	BATCH 41/1203 - LOSS: 1.166198968887329
	BATCH 61/1203 - LOSS: 1.0795202255249023
	BATCH 81/1203 - LOSS: 1.6125942468643188
	BATCH 101/1203 - LOSS: 0.6729816794395447
	BATCH 121/1203 - LOSS: 1.1487425565719604
	BATCH 141/1203 - LOSS: 1.504636526107788
	BATCH 161/1203 - LOSS: 0.7722253203392029
	BATCH 181/1203 - LOSS: 1.3899345397949219
	BATCH 201/1203 - LOSS: 1.0432389974594116
	BATCH 221/1203 - LOSS: 0.9115486145019531
	BATCH 241/1203 - LOSS: 1.0663762092590332
	BATCH 261/1203 - LOSS: 1.1596888303756714
	BATCH 281/1203 - LOSS: 1.1666537523269653
	BATCH 301/1203 - LOSS: 1.0621743202209473
	BATCH 321/1203 - LOSS: 1.0519338846206665
	BATCH 341/1203 - LOSS: 1.5109314918518066
	BATCH 361/1203 - LOSS: 1.0512527227401733
	BATCH 381/1203 - LOSS: 1.3005682229995728
	BATCH 401/1203 - LOSS: 1.1964164972305298
	BATCH 421/1203 - LOSS: 