In [None]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --apt-packages libomp5 libopenblas-dev
!pip install efficientnet_pytorch

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch_xla
import torch_xla.core.xla_model as xm

In [None]:
train_csv = pd.read_csv("/kaggle/input/cassava-leaf-disease-classification/train.csv")
train_csv.head()

In [None]:
import os

In [None]:
os.mkdir("/kaggle/temp")
os.mkdir("/kaggle/temp/training_images")
for i in list(set(train_csv["label"])):
    os.mkdir("/kaggle/temp/training_images/{}".format(i))

In [None]:
all_training_files = os.listdir("/kaggle/input/cassava-leaf-disease-classification/train_images")
len(all_training_files)

In [None]:
from tqdm import tqdm
import shutil

In [None]:
for file in tqdm(all_training_files):
    label = int(train_csv[train_csv["image_id"] == file]["label"])
    dirname = "/kaggle/input/cassava-leaf-disease-classification/train_images/" + file
    outname = "/kaggle/temp/training_images/{}/{}".format(label, file)
    shutil.copyfile(dirname, outname)

In [None]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import numpy as np
from PIL import Image
import json

In [None]:
from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.metrics import Accuracy, Loss, RunningAverage, ConfusionMatrix
from ignite.contrib.handlers import ProgressBar

In [None]:
def get_data_loaders(data_dir, batch_size):
    transform = transforms.Compose([transforms.RandomResizedCrop(512),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomVerticalFlip(),
#                                 transforms.RandomRotation(20),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
    all_data = datasets.ImageFolder(data_dir, transform=transform)
    train_data_len = int(len(all_data)*0.81)
    valid_data_len = int(len(all_data) - train_data_len)
    train_data, val_data = random_split(all_data, [train_data_len, valid_data_len])
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
    return ((train_loader, val_loader), all_data.classes)

In [None]:
path = "/kaggle/temp/training_images/"

In [None]:
(train_loader, val_loader), classes = get_data_loaders(path, 256)

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy() # convert images to numpy for display
# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(14, 10))
for idx in np.arange(20):
    ax = fig.add_subplot(5, 12/2, idx+1, xticks=[], yticks=[])
    plt.imshow(np.transpose(images[idx], (1, 2, 0)))
    ax.set_title(classes[labels[idx]])

In [None]:
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device = xm.xla_device()
device

In [None]:
from efficientnet_pytorch import EfficientNet
class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
#         self.base = models.densenet161(pretrained=True)
        self.base = EfficientNet.from_pretrained('efficientnet-b5')
        self.dense = nn.Linear(1000, num_classes)
        
        for param in self.base.parameters():
            param.requires_grad = False
    def forward(self, x):
        return self.dense(self.base(x))

In [None]:
# model = models.resnext50_32x4d(pretrained=True)
model = Model(len(classes))
# from efficientnet_pytorch import EfficientNet
# model = EfficientNet.from_pretrained('efficientnet-b5')

In [None]:
print(model)

In [None]:
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_params(model)

In [None]:
# for param in model.parameters():
#     param.requires_grad = False

In [None]:
# n_inputs = model.fc.in_features
# last_layer = nn.Linear(n_inputs, len(classes))
# model.fc = last_layer
# model = model.to(device)
# print(model.fc.out_features)

In [None]:
model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
criterion = criterion.to(device)

In [None]:
training_history = {'accuracy':[],'loss':[]}
validation_history = {'accuracy':[],'loss':[]}

In [None]:
trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
evaluator = create_supervised_evaluator(model,
                                        device=device,
                                        metrics={
                                            'accuracy': Accuracy(),
                                            'loss': Loss(criterion),
                                            'cm':ConfusionMatrix(len(classes))
                                            })
@trainer.on(Events.ITERATION_COMPLETED)
def log_a_dot(engine):
    print(".",end="")

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_loader)
    metrics = evaluator.state.metrics
    accuracy = metrics['accuracy']*100
    loss = metrics['loss']
    training_history['accuracy'].append(accuracy)
    training_history['loss'].append(loss)
    print()
    print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, accuracy, loss))
    
@trainer.on(Events.EPOCH_COMPLETED)   
def log_validation_results(trainer):
    evaluator.run(val_loader)
    metrics = evaluator.state.metrics
    accuracy = metrics['accuracy']*100
    loss = metrics['loss']
    validation_history['accuracy'].append(accuracy)
    validation_history['loss'].append(loss)
    print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, accuracy, loss))

In [None]:
trainer.run(train_loader, max_epochs=8)

In [None]:
modelsave = model.to("cpu")

In [None]:
torch.save(modelsave.state_dict(), "/kaggle/working/model.pth")

In [None]:
# all_testing_files = os.listdir("/kaggle/input/cassava-leaf-disease-classification/test_images/")
# len(all_testing_files)

In [None]:
# model.eval()

In [None]:
# def apply_test_transforms(inp):
#     out = transforms.functional.resize(inp, [224,224])
#     out = transforms.functional.to_tensor(out)
#     out = transforms.functional.normalize(out, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     return out

In [None]:
# from PIL import Image

In [None]:
# def predict(model, image):
#     im_as_tensor = apply_test_transforms(im)
#     minibatch = torch.stack([im_as_tensor])
#     if torch.cuda.is_available():
#         minibatch = minibatch.cuda()
#     pred = model(minibatch)
#     _, classnum = torch.max(pred, 1)
#     return classes[classnum]

In [None]:
# rows = []
# for file in all_testing_files:
#     fname = "/kaggle/input/cassava-leaf-disease-classification/test_images/" + file
#     im = Image.open(fname)
#     cls = predict(model, im)
#     row = {'imageid':file, 'label':cls}
#     rows.append(row)

In [None]:
# submission = pd.DataFrame(rows)
# submission

In [None]:
# submission.to_csv('./submission.csv', index=False)