In [None]:
%%capture
! pip install -U timm py7zr

In [None]:
import os
import gc
import cv2
import math
import copy
import time
import random

from pathlib import Path

# For data manipulation
import numpy as np
import pandas as pd

import py7zr

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler, AdamW
from torch.optim.lr_scheduler import _LRScheduler, StepLR
from torch.utils.data import Dataset, DataLoader, random_split

import torchmetrics
import torchvision.models as models
from torchvision import transforms as T
import pytorch_lightning as pl
from pytorch_lightning import Callback, LightningModule, Trainer, LightningDataModule
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold

from PIL import Image

# For Image Models
import timm
from timm.data.transforms_factory import create_transform
from timm.optim import create_optimizer_v2
# from efficientnet_pytorch import EfficientNet
# import torchvision.models as models

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
# Useful Tool to analyze key metrics during and after training
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
except:
    print('Something Wrong!')

In [None]:
OUTPUT_DIR = Path("/") / "kaggle" / "working"
TEMP_DIR = Path("/") / "kaggle" / "temp"

ROOT_DIR = Path('..') / 'input' / 'cifar-10'

TRAIN_ZIP = ROOT_DIR / 'train.7z'
TEST_ZIP = ROOT_DIR / 'test.7z'

TRAIN_DIR = TEMP_DIR / 'train'
TEST_DIR = TEMP_DIR / 'test'

TRAIN_CSV = ROOT_DIR / 'trainLabels.csv'

SAMPLE_SUBMISSION_CSV_PATH = ROOT_DIR / "sampleSubmission.csv"

ENCODER_CLASSES_PATH = OUTPUT_DIR / "encoder_classes.npy"
TEST_CSV_PATH = OUTPUT_DIR / "test.csv"
TRAIN_CSV_ENCODED_PATH = OUTPUT_DIR / "encoded_train.csv"
CHECKPOINTS_DIR = OUTPUT_DIR / "checkpoints"
SUBMISSION_CSV_PATH = OUTPUT_DIR / "submission.csv"

In [None]:
archive = py7zr.SevenZipFile(TRAIN_ZIP, mode='r')
archive.extractall( "/kaggle/working/")
archive.close()

archive = py7zr.SevenZipFile(TEST_ZIP, mode='r')
archive.extractall("/kaggle/working/")
archive.close()

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
train_df.to_csv('/kaggle/working/trainLabels.csv')

test_df = pd.read_csv(SAMPLE_SUBMISSION_CSV_PATH)
test_df.to_csv('/kaggle/working/sampleSubmission.csv')

In [None]:
# archive = py7zr.SevenZipFile(TRAIN_ZIP, mode='r')
# archive.extractall(TEMP_DIR)
# archive.close()

# archive = py7zr.SevenZipFile(TEST_ZIP, mode='r')
# archive.extractall(TEMP_DIR)
# archive.close()

In [None]:
# train_df = pd.read_csv(TRAIN_CSV)
# train_df['file_path'] = train_df.id.apply(lambda x: f"{TRAIN_DIR / str(x)}.png")

# encoder = LabelEncoder()
# train_df['encoded_label'] = encoder.fit_transform(train_df['label'])
# np.save(ENCODER_CLASSES_PATH, encoder.classes_)

# skf = StratifiedKFold(n_splits=5)
# for fold, ( _, val_) in enumerate(skf.split(X=train_df, y=train_df.encoded_label)):
#       train_df.loc[val_ , "kfold"] = fold

# train_df.to_csv(TRAIN_CSV_ENCODED_PATH)

In [None]:
# test_df = pd.read_csv(SAMPLE_SUBMISSION_CSV_PATH)
# test_df['file_path'] = test_df.id.apply(lambda x: f"{TEST_DIR / str(x)}.png")
# test_df['encoded_label'] = 0

# test_df.to_csv(TEST_CSV_PATH)

In [None]:
# class CIFAR(Dataset):
#     def __init__(self, df, transforms=None):
#         super(CIFAR, self).__init__()
        
#         self.df = df
#         self.file_paths = df['file_path'].values
#         self.labels = df['encoded_label'].values
#         self.transforms = transforms
            
#     def __len__(self):
#         return len(self.df)
    
#     def __getitem__(self, idx):
#         img_path = self.file_paths[idx]
#         img = Image.open(img_path)
        
#         label = self.labels[idx]
#         label = torch.tensor(label, dtype=torch.long)
        
#         if self.transforms:
#             img = self.transforms(img)
        
#         return img, label

In [None]:
# class CIFARDM(LightningDataModule):
#     def __init__(
#         self,
#         train_csv_encoded,
#         test_csv,
#         val_fold,
#         image_size, 
#         batch_size,
#         num_workers
#     ):
#         super().__init__()
        
#         self.save_hyperparameters()
        
#         self.train_df = pd.read_csv(train_csv_encoded)
#         self.test_df = pd.read_csv(test_csv)
        
# #         self.transform = create_transform(
# #             input_size=(self.hparams.image_size, self.hparams.image_size),
# #             crop_pct=1.0
# #         )
        
#         self.train_transform = T.Compose([
#                                             T.Resize((self.hparams.image_size, self.hparams.image_size)),
#                                             T.AutoAugment(policy=T.AutoAugmentPolicy.CIFAR10),
#                                             T.ToTensor(),
#                                             T.Normalize(
#                                                 mean=[0.4914, 0.4822, 0.4465], 
#                                                 std=[0.247, 0.243, 0.261],
#                                             )
#                                         ])
        
#         self.test_transform = T.Compose([
#                                             T.Resize((self.hparams.image_size, self.hparams.image_size)),
#                                             T.ToTensor(),
#                                             T.Normalize(
#                                                 mean=[0.4914, 0.4822, 0.4465], 
#                                                 std=[0.247, 0.243, 0.261],
#                                             )
#                                         ])
        
        
        
#     def setup(self, stage=None):
#         if stage=='fit' or stage is None:
#             train_df = self.train_df[self.train_df.kfold != self.hparams.val_fold].reset_index(drop=True)
#             val_df = self.train_df[self.train_df.kfold == self.hparams.val_fold].reset_index(drop=True)

#             self.train_dataset = CIFAR(train_df, transforms=self.train_transform)
#             self.val_dataset = CIFAR(val_df, transforms=self.test_transform)
            
#         if stage=='test' or stage is None:
#             self.test_dataset = CIFAR(self.test_df, transforms=self.test_transform)
            
#     def train_dataloader(self):
#         return self._dataloader(self.train_dataset, train=True)
    
#     def val_dataloader(self):
#         return self._dataloader(self.val_dataset)
    
#     def test_dataloader(self):
#         return self._dataloader(self.test_dataset)
    
#     def _dataloader(self, dataset, train=False):
#         return DataLoader(
#             dataset, 
#             batch_size=self.hparams.batch_size,
#             shuffle=train,
#             num_workers=self.hparams.batch_size,
#             pin_memory=True,
#             drop_last=train
#         )

In [None]:
# class Conv2dSamePadding(nn.Conv2d):
#     def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
#         super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
#         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2

#         assert image_size is not None
#         ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
#         kh, kw = self.weight.size()[-2:]
#         sh, sw = self.stride
#         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
#         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
#         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        
#         if pad_h > 0 or pad_w > 0:
#             self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
#         else:
#             self.static_padding = nn.Identity()

#     def forward(self, x):
#         x = self.static_padding(x)
#         x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
#         return x


In [None]:
# class CIFAR_Model(LightningModule):
#     def __init__(
#         self,
#         optimizer,
#         learning_rate,
#         weight_decay,
#         len_train_dl,
#         epochs,
#         gamma
#     ):
        
#         super().__init__()
#         self.save_hyperparameters()
        
#         self.blocks = [self.conv_block(in_f, out_f, im_s, kernel_size=3, padding=0, stride=1) for in_f, out_f, im_s in zip([1, 32, 64], [32, 64], [28, 24])]

#         self.model = nn.Sequential(
#             *self.blocks,
#             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4),
#             nn.ReLU(),
#             nn.BatchNorm2d(num_features=128),
#             nn.Flatten(),
#             nn.Dropout(0.4),
#             nn.Linear(in_features=512, out_features=10)
#         )
        
#         self.loss_fn = nn.CrossEntropyLoss()
#         self.metrics = torchmetrics.Accuracy()

#     def conv_block(self, in_f, out_f, im_s, *args, **kwargs):
#         return nn.Sequential(
#           nn.Conv2d(in_channels=in_f, out_channels=out_f, *args, **kwargs),
#           nn.ReLU(),
#           nn.BatchNorm2d(num_features=out_f),
#           nn.Conv2d(in_channels=out_f, out_channels=out_f, *args, **kwargs),
#           nn.ReLU(),
#           nn.BatchNorm2d(num_features=out_f),
#           Conv2dSamePadding(in_channels=out_f, out_channels=out_f, kernel_size=5, stride=2, image_size=im_s),
#           nn.ReLU(),
#           nn.BatchNorm2d(num_features=out_f),
#           nn.Dropout(0.4)
#         )

#     def forward(self, x):
#         x = self.model(x)
#         return F.softmax(x, dim=1)

    
#     def training_step(self, batch, batch_idx):
#         return self._step(batch, "train")
    
#     def validation_step(self, batch, batch_idx):
#         return self._step(batch, "val")
    
#     def predict_step(self, batch, batch_idx):
#         x, _ = batch
#         output = self(x)
#         return output
    
#     def _step(self, batch, step):
#         images, labels = batch
        
#         outputs = self(images)
        
#         loss = self.loss_fn(outputs, labels)
        
#         self.metrics(outputs, labels)
        
#         self.log(f"{step}_loss",loss)
#         self.log(f"{step}_acc", self.metrics)
        
#         return loss
    
#     def configure_optimizers(self):
#         optimizer = create_optimizer_v2(
#             self.parameters(),
#             opt=self.hparams.optimizer, 
#             lr=self.hparams.learning_rate,
#             weight_decay=self.hparams.weight_decay,
#         )
        
#         scheduler = torch.optim.lr_scheduler.ExponentialLR(
#             optimizer,
#             gamma=self.hparams.gamma
#         )
        
#         scheduler = {"scheduler": scheduler, "interval": "step"}
        
#         return [optimizer], [scheduler]

In [None]:
# class CIFAR_Model(LightningModule):
#     def __init__(
#         self,
#         optimizer,
#         learning_rate,
#         weight_decay,
#         len_train_dl,
#         epochs
#     ):
        
#         super().__init__()
#         self.save_hyperparameters()
        
#         self.sizes = [3, 128, 256, 512]
#         self.params = [512, 256, 128, 128]
#         self.conv_blocks1 = [self.conv_block1(in_f, out_f, kernel_size=3, padding=1, dilation=2, bias=False) for in_f, out_f in zip(self.sizes, self.sizes[1:])]
#         self.conv_blocks2 = [self.conv_block1(in_f, out_f, kernel_size=3, padding=1, dilation=2, bias=False) for in_f, out_f in zip(self.sizes, self.sizes[1:])]
        
#         self.branch1 = nn.Sequential(
#                             *self.conv_blocks1,
#                             self.conv_block2(kernel_size=3, padding=1)
#                         )
        
#         self.branch2 = nn.Sequential(
#                             *self.conv_blocks2,
#                             self.conv_block2(kernel_size=3, padding=1)
#                         )
        
#         self.db = [self.dense_block(in_f, out_f, bias=False) for in_f, out_f in zip(self.params, self.params[1:])]
#         self.dense_layers = nn.Sequential(*self.db)
        
#         self.ll = nn.Linear(128, 10)
        
#         self.loss_fn = nn.CrossEntropyLoss()
#         self.metrics = torchmetrics.Accuracy()
        
#     def conv_block1(self, in_f, out_f, *args, **kwargs):
#         return nn.Sequential(
#             nn.Conv2d(in_f, out_f, *args, **kwargs),
#             nn.BatchNorm2d(out_f),
#             nn.MaxPool2d(2),
#             nn.ReLU(),
#             nn.Dropout(0.2)
#         )
    
#     def conv_block2(self, *args, **kwargs):
#         return nn.Sequential(
#             nn.Conv2d(512, 720, *args, **kwargs),
#             nn.Flatten(),
#             nn.Linear(2880, 512, bias=False),
#             nn.BatchNorm1d(512),
#             nn.ReLU(),
#             nn.Dropout(0.2),
#             nn.Linear(512, 256, bias=False),
#             nn.BatchNorm1d(256),
#             nn.ReLU(),
#             nn.Dropout(0.2)
#         )
    
#     def dense_block(self, in_f, out_f, *args, **kwargs):
#         return nn.Sequential(
#             nn.Linear(in_f, out_f, *args, **kwargs),
#             nn.BatchNorm1d(out_f),
#             nn.ReLU(),
#             nn.Dropout(0.2)
#         )
        
#     def forward(self, x):
#         x = torch.cat([self.branch1(x), self.branch2(x)], dim=1)
#         x = self.dense_layers(x)
#         x = self.ll(x)
        
#         return F.normalize(x)
    
#     def training_step(self, batch, batch_idx):
#         return self._step(batch, "train")
    
#     def validation_step(self, batch, batch_idx):
#         return self._step(batch, "val")
    
#     def predict_step(self, batch, batch_idx):
#         x, _ = batch
#         output = self(x)
#         return output
    
#     def _step(self, batch, step):
#         images, labels = batch
        
#         outputs = self(images)
        
#         loss = self.loss_fn(outputs, labels)
        
#         self.metrics(outputs, labels)
        
#         self.log(f"{step}_loss",loss)
#         self.log(f"{step}_acc", self.metrics)
        
#         return loss
    
#     def configure_optimizers(self):
#         optimizer = create_optimizer_v2(
#             self.parameters(),
#             opt=self.hparams.optimizer, 
#             lr=self.hparams.learning_rate,
#             weight_decay=self.hparams.weight_decay,
#         )
        
#         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#             optimizer,
#             factor=0.6
#         )
        
#         scheduler = {"scheduler": scheduler, "interval": "step", "monitor": train_loss}
        
#         return [optimizer], [scheduler]

In [None]:
# def train(
#     train_csv_encoded=str(TRAIN_CSV_ENCODED_PATH),
#     test_csv=str(TEST_CSV_PATH),
#     val_fold=0.0,
#     image_size=32,
#     batch_size=64,
#     num_workers=2,
#     optimizer="adam",
#     learning_rate=3e-4,
#     weight_decay=1e-6,
#     gamma=0.95,
#     checkpoints_dir=str(CHECKPOINTS_DIR),
#     auto_lr_find=False,
#     auto_scale_batch_size=False,
#     fast_dev_run=False,
#     gpus=1,
#     max_epochs=60,
#     precision=16,
#     stochastic_weight_avg=True
# ):
#     pl.seed_everything(41)
    
    
#     datamodule = CIFARDM(
#         train_csv_encoded=train_csv_encoded,
#         test_csv=test_csv,
#         val_fold=val_fold,
#         image_size=image_size,
#         batch_size=batch_size,
#         num_workers=num_workers,
#     )
    
#     datamodule.setup()
#     len_train_dl = len(datamodule.train_dataloader())
    
#     module = CIFAR_Model(
#         optimizer=optimizer,
#         learning_rate=learning_rate,
#         weight_decay=weight_decay,
#         len_train_dl=len_train_dl,
#         epochs=max_epochs,
#         gamma=gamma
#     )
    
#     wandb_logger = WandbLogger(project='CIFAR', 
#                            job_type='Train',
#                            anonymous='must')
    
#     model_checkpoint = ModelCheckpoint(
#         checkpoints_dir,
#         filename=f"My_Model_{image_size}",
#         monitor="train_loss",
#     )
    
#     lr_monitor = LearningRateMonitor(logging_interval='step')
    
#     trainer = pl.Trainer(
#         logger=wandb_logger,
#         auto_lr_find=auto_lr_find,
#         auto_scale_batch_size=auto_scale_batch_size,
#         benchmark=True,
#         callbacks=[model_checkpoint, lr_monitor],
#         deterministic=True,
#         fast_dev_run=fast_dev_run,
#         gpus=gpus,
#         max_epochs=1 if DEBUG else max_epochs,
#         precision=precision,
#         stochastic_weight_avg=stochastic_weight_avg,
#         limit_train_batches=0.1 if DEBUG else 1.0,
#         limit_val_batches=0.1 if DEBUG else 1.0,
#     )
    
#     trainer.tune(module, datamodule=datamodule)

#     trainer.fit(module, datamodule=datamodule)
    
#     preds = trainer.predict(module, datamodule.test_dataloader())
#     preds = torch.cat(preds, dim=0)
#     preds_idx = torch.argmax(preds, 1).tolist()
#     preds = encoder.inverse_transform(preds_idx)
    
#     df = pd.DataFrame()
#     df['id'] = test_df['id']
#     df['label'] = preds

#     df.to_csv('submission.csv', index=False)
    
#     wandb_logger.finalize("success")
#     wandb.finish()

In [None]:
# DEBUG=False

# image_size = 32

# train(
#     image_size=image_size,
    
# )