## Import Libraries

In [None]:
!pip install albumentations==0.4.6
import albumentations 
from albumentations.pytorch import ToTensorV2

In [None]:
print('started')
import os
import numpy as np
import pandas as pd
import albumentations as A
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import torchvision
from torchvision import transforms
from tqdm import tqdm
from functools import partial
import warnings  
warnings.filterwarnings('ignore')

In [None]:
import os
import numpy as np
import pandas as pd
import albumentations as A
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from albumentations import Rotate 
import matplotlib.pyplot as plt
import seaborn as sns
import torchvision
from torchvision import transforms
from tqdm import tqdm
import warnings  
warnings.filterwarnings('ignore')

In [None]:
!git clone https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
%cd Ranger-Deep-Learning-Optimizer
!pip install -e
%cd ..

In [None]:
!pip install pytorch_ranger

In [None]:
from pytorch_ranger import Ranger  # this is from ranger.py
from pytorch_ranger import RangerVA  # this is from ranger913A.py
from pytorch_ranger import RangerQH  # this is from rangerqh.py

In [None]:
SEED = 42
num_epoch = 40
BATCH_SIZE = 128
image_size = 224
num_workers = 4

## Load data

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

In [None]:
train_csv_path = '../input/vision-verse/data/train.csv'
train_df = pd.read_csv(train_csv_path)
le.fit(train_df.label)
train_df['label'] = le.transform(train_df.label)
train_df.head()
# train_df = train_df.sample(1000)

In [None]:
train_df.describe()

In [None]:
train_df['label'].value_counts()

In [None]:
# Let's see the distribution of each species
sns.countplot(train_df.label)
plt.xticks(rotation=90)

# Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    train_df['path'], train_df['label'], test_size=0.1, random_state=42)
print(X_train.dtype)
print(y_train.dtype)
print(X_test.dtype)
print(y_test.dtype)

In [None]:
print(len(X_train), len(X_test), len(y_train), len(y_test))

## Training Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, X_train, y_train, transform):
        self.root_dir = root_dir
        self.transform = transform
        self.X_train = X_train
        self.y_train=y_train
    
    def __len__(self):
        return len(self.X_train)
    
    def __getitem__(self, index):
        label = self.y_train.iloc[index]

        image_path = f"{self.root_dir}/{self.X_train.iloc[index]}"
#         print(image_path)
        if '.gif' in image_path:
            cap= cv2.VideoCapture(image_path)
            ret, image = cap.read()
            cap.release()
        else:
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image = self.transform(image)
        return image, torch.tensor(label)

In [None]:
train_transforms = transforms.Compose([transforms.ToPILImage(),
                                       transforms.Resize((image_size,image_size)),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5,0.5,0.5],
                                                            [0.5,0.5,0.5])])

In [None]:
root_dir = '../input/vision-verse'

dataset = CustomDataset(root_dir,
                        X_train, y_train,
                        train_transforms)

train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = num_workers)

In [None]:
valid_dataset = CustomDataset(root_dir,
                        X_test, y_test,
                        train_transforms)

valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = num_workers)

## Model architecture

In [None]:
class Conv2dAuto(nn.Conv2d):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.padding =  (self.kernel_size[0] // 2, self.kernel_size[1] // 2) # dynamic add padding based on the kernel_size
        
conv3x3 = partial(Conv2dAuto, kernel_size=3, bias=False)

In [None]:
def activation_func(activation):
    return  nn.ModuleDict([
        ['relu', nn.ReLU(inplace=True)],
        ['leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True)],
        ['selu', nn.SELU(inplace=True)],
        ['none', nn.Identity()]
    ])[activation]

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, activation='relu'):
        super().__init__()
        self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation
        self.blocks = nn.Identity()
        self.activate = activation_func(activation)
        self.shortcut = nn.Identity()   
    
    def forward(self, x):
        residual = x
        if self.should_apply_shortcut: residual = self.shortcut(x)
        x = self.blocks(x)
        x += residual
        x = self.activate(x)
        return x
    
    @property
    def should_apply_shortcut(self):
        return self.in_channels != self.out_channels

In [None]:
class ResidualBlock(ResidualBlock):
    def __init__(self, in_channels, out_channels, expansion=1, downsampling=1, conv=conv3x3, *args, **kwargs):
        super().__init__(in_channels, out_channels, *args, **kwargs)
        self.expansion, self.downsampling, self.conv = expansion, downsampling, conv
        self.shortcut = nn.Sequential(
            nn.Conv2d(self.in_channels, self.expanded_channels, kernel_size=1,
                      stride=self.downsampling, bias=False),
            nn.BatchNorm2d(self.expanded_channels)) if self.should_apply_shortcut else None
        
        
    @property
    def expanded_channels(self):
        return self.out_channels * self.expansion
    
    @property
    def should_apply_shortcut(self):
        return self.in_channels != self.expanded_channels

In [None]:
def conv_bn(in_channels, out_channels, conv, *args, **kwargs):
    return nn.Sequential(conv(in_channels, out_channels, *args, **kwargs), nn.BatchNorm2d(out_channels))

In [None]:
class BasicBlock(ResidualBlock):
    expansion = 1
    def __init__(self, in_channels, out_channels, *args, **kwargs):
        super().__init__(in_channels, out_channels, *args, **kwargs)
        self.blocks = nn.Sequential(
            conv_bn(self.in_channels, self.out_channels, conv=self.conv, bias=False, stride=self.downsampling),
            activation_func(self.activation),
            conv_bn(self.out_channels, self.expanded_channels, conv=self.conv, bias=False),
        )

In [None]:
class BottleNeckBlock(ResidualBlock):
    expansion = 4
    def __init__(self, in_channels, out_channels, *args, **kwargs):
        super().__init__(in_channels, out_channels, expansion=4, *args, **kwargs)
        self.blocks = nn.Sequential(
           conv_bn(self.in_channels, self.out_channels, self.conv, kernel_size=1),
             activation_func(self.activation),
             conv_bn(self.out_channels, self.out_channels, self.conv, kernel_size=3, stride=self.downsampling),
             activation_func(self.activation),
             conv_bn(self.out_channels, self.expanded_channels, self.conv, kernel_size=1),
        )

In [None]:
class Layer(nn.Module):
    def __init__(self, in_channels, out_channels, block=BasicBlock, n=1, *args, **kwargs):
        super().__init__()
        # 'We perform downsampling directly by convolutional layers that have a stride of 2.'
        downsampling = 2 if in_channels != out_channels else 1
        self.blocks = nn.Sequential(
            block(in_channels , out_channels, *args, **kwargs, downsampling=downsampling),
            *[block(out_channels * block.expansion, 
                    out_channels, downsampling=1, *args, **kwargs) for _ in range(n - 1)]
        )

    def forward(self, x):
        x = self.blocks(x)
        return x

In [None]:
class Encoder(nn.Module):
    def __init__(self, in_channels=3, blocks_sizes=[64, 128, 256, 512], deepths=[2,2,2,2], 
                 activation='relu', block=BasicBlock, *args, **kwargs):
        super().__init__()
        self.blocks_sizes = blocks_sizes
        
        self.gate = nn.Sequential(
            nn.Conv2d(in_channels, self.blocks_sizes[0], kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(self.blocks_sizes[0]),
            activation_func(activation),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        
        self.in_out_block_sizes = list(zip(blocks_sizes, blocks_sizes[1:]))
        self.blocks = nn.ModuleList([ 
            Layer(blocks_sizes[0], blocks_sizes[0], n=deepths[0], activation=activation, 
                        block=block,*args, **kwargs),
            *[Layer(in_channels * block.expansion, 
                          out_channels, n=n, activation=activation, 
                          block=block, *args, **kwargs) 
              for (in_channels, out_channels), n in zip(self.in_out_block_sizes, deepths[1:])]       
        ])
        
        
    def forward(self, x):
        x = self.gate(x)
        for block in self.blocks:
            x = block(x)
        return x

In [None]:
class Decoder(nn.Module):
    def __init__(self, in_features, n_classes):
        super().__init__()
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.decoder = nn.Linear(in_features, n_classes)

    def forward(self, x):
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.decoder(x)
        return x

In [None]:
class custom_model(nn.Module):
    
    def __init__(self, in_channels, n_classes, *args, **kwargs):
        super().__init__()
        self.encoder = Encoder(in_channels, *args, **kwargs)
        self.decoder = Decoder(self.encoder.blocks[-1].blocks[-1].expanded_channels, n_classes)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
def my_model(in_channels, n_classes, block=BasicBlock, *args, **kwargs):
    return custom_model(in_channels, n_classes, block=block, deepths=[2, 2, 2, 2], *args, **kwargs)

In [None]:
!pip install torchsummary
from torchsummary import summary

model = my_model(3, 4)
summary(model.cuda(), (3, 224, 224))

In [None]:
images, targets = next(iter(train_loader))
images.shape, targets.shape

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
import gc

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduce = reduce

    def forward(self, inputs, targets):
        BCE_loss = nn.CrossEntropyLoss()(inputs, targets)

        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [None]:
best_s = 0

model.to(device)
EPOCHS = num_epoch
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.8)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 10)
for epoch in range(EPOCHS):
    print(f'Epoch: {epoch+1}/{EPOCHS}')
    if epoch == 0:
        for param in model.parameters():
            param.requires_grad = True

    correct = 0
    total = 0
    losses = []
    
    for batch_idx, data in enumerate(tqdm(train_loader)):
        images, targets = data
        images = images.to(device)
        targets = targets.to(device)
        output = model(images)
        loss = criterion(output, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
        _, pred = torch.max(output, 1)
        correct += (pred == targets).sum().item()
        total += pred.size(0)
        losses.append(loss.item())        
        loss.detach()
        del images, targets, output, loss
        gc.collect()
        
    train_loss = np.mean(losses)
    train_acc = correct * 1.0 / total
    del losses
    total=0
    correct=0
    with torch.no_grad():
        for batch_idx, data in enumerate(tqdm(valid_loader)):
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            
            output = model(images)

            _, pred = torch.max(output, 1)
            correct += (pred == targets).sum().item()
            
            total += pred.size(0)

        valid_acc = correct * 1.0 / total
        # Saving State Dict
    if valid_acc > best_s:
        torch.save({
            'model_state_dict': model.state_dict(),
            'accuracy': correct
        }, f'last_checkpoint.pth.tar')
        best_s = valid_acc
        print('best saved epoch ' + str(epoch))
    print(f'Train Loss: {train_loss}\tTrain Acc: {train_acc*100}\tLR: {scheduler.get_lr()}\tValid Accuracy: {correct/total * 100}')
    scheduler.step()

In [None]:
torch.cuda.empty_cache
gc.collect()

# Make predictions

In [None]:
pre_path= './last_checkpoint.pth.tar' 
model.load_state_dict(torch.load(pre_path)['model_state_dict'])
model.eval()

In [None]:
sample_csv_path = '../input/vision-verse/data/sample_submission.csv'
sample_df = pd.read_csv(sample_csv_path)
# sample_df['Class'] = le.transform(sample_df.Class)
sample_df['label'] = 0
sample_df.head()
# sample_df["label"] = pd.to_numeric(sample_df["label"])

In [None]:
X_final = sample_df['path']
y_final = pd.to_numeric(sample_df["label"])
print(X_final.dtype)
print(y_final.dtype)

In [None]:
test_dataset = CustomDataset('../input/vision-verse',
                        X_final, y_final,
                        test_transforms)


test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers = num_workers)

In [None]:
label = np.array([])
with torch.no_grad():
    for batch_idx, data in enumerate(tqdm(test_loader)):
        images, targets = data
        images = images.to(device)
        targets = targets.to(device)
        
        output = model(images)
        _, pred = torch.max(output, 1)
        label = np.concatenate((label, np.array(pred.cpu().data)), axis = 0)
#         print(label)

In [None]:
sample_df.head()

In [None]:
sample_df['label'] = label
sample_df['label'] = sample_df["label"].astype(int)
sample_df['label'] = le.inverse_transform(sample_df['label'])

In [None]:
sample_df.to_csv('submission.csv', index=False)

In [None]:
sample_df.shape

In [None]:
sample_df.head()

In [None]:
sample_df['label'].value_counts()