In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import torch
from PIL import Image
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import torch.nn.functional as F
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import time
import random
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, roc_auc_score, confusion_matrix
import warnings
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import os
PATH = "/content/drive/MyDrive/Real Douments/MS Purdue/Spring 23/ML/train "
labels = pd.read_csv(os.path.join(PATH, "train_small.csv"))
encodings = pd.read_csv(os.path.join(PATH, "category.csv"))

In [None]:
encodings.rename(columns={'Unnamed: 0': 'Label'}, inplace=True)
encodings.head()

In [None]:
labels = labels.merge(encodings, how="left")

In [None]:
labels['Label'].value_counts()

In [None]:
labels.head(10)

In [None]:
class CustomTrainDataset(Dataset):
    def __init__(self, paths, labels, transform):
        self.paths = paths
        self.labels = labels
        self.transform = transform
        
    def __getitem__(self, idx):
        name = self.paths[idx]
        img = Image.open(f'{name}')
        img = img.convert("RGB")
        img = np.array(img)
        img = img.astype("float32")
        img /= 255.

        transformed = self.transform(image=img)
        img = transformed['image']
        img = img.transpose(2,0,1).astype('float32')
        labels = self.labels[idx]
        return img, labels

    def __len__(self):
        return len(self.paths)

In [None]:
def train_transform():
    return A.Compose([
        A.HorizontalFlip(),
            A.OneOf([
                A.RandomContrast(),
                A.RandomGamma(),
                A.RandomBrightness(),
                ], p=0.3),
            A.OneOf([
                A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                A.GridDistortion(),
                A.OpticalDistortion(distort_limit=2, shift_limit=0.5),  
                ], p=0.3),
            A.ShiftScaleRotate(p=0.2),
            A.Resize(256,256,always_apply=True),
    ],p=1.)

def val_transform():
    return A.Compose([
        A.Resize(256,256,always_apply=True),
    ],p=1.)

In [None]:
#/content/drive/MyDrive/Real Douments/MS Purdue/Spring 23/ML/train /train_small
labels['File Name'] = PATH + "/train_small/" + labels['File Name']
PATH

In [None]:
target_df = pd.get_dummies(labels.Label)
targets = target_df.columns

In [None]:
labels['Label'].value_counts()

In [None]:
labels

In [None]:
from sklearn.model_selection import train_test_split

def prepare_train_valid_dataloader(df):
    train_paths, val_paths, train_labels, val_labels = train_test_split(df['Filename'], df['Label']) 
    train_ds = CustomTrainDataset(train_paths, train_labels, train_transform)
    val_ds = CustomTrainDataset(val_paths, val_labels, val_transform)
    train_loader = DataLoader(train_ds, batch_size=128, pin_memory=True, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_ds, batch_size=64, pin_memory=True, shuffle=False, num_workers=4)
    return train_loader, val_loader

In [None]:
import torchvision.models as models

In [None]:
class ImageRecgoModel(nn.Module):
    def __init__(self, n_labels):
        super(ImageRecgoModel, self).__init__()
        self.model = models.resnet18(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, 100)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def Loss_fn(images, targets, model, device):
    model.to(device)
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    criterion = nn.BCEWithLogitsLoss()
    loss = criterion(outputs, targets)
    return loss, outputs

In [None]:
def train(epoch, model, device, optimizer, scheduler, trainloader):
    model.train()
    t = time.time()
    total_loss = 0
    total_targets = []
    total_outputs = []
    for step, (images, targets) in enumerate(trainloader):
        loss, outputs = Loss_fn(images, targets, model, device)
        loss.backward()
        targets = targets.detach().cpu().numpy()
        outputs = outputs.detach().cpu().numpy()
        targets = targets.argmax(axis=1)
        outputs = outputs.argmax(axis=1)
        total_targets.extend(targets)
        total_outputs.extend(outputs)
        if ((step+1)%4==0 or (step+1)==len(trainloader)):
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        loss = loss.detach().item()
        total_loss += loss
        if ((step+1)%10==0 or (step+1)==len(trainloader)):
            print(
                    f'epoch {epoch} train step {step+1}/{len(trainloader)}, ' + \
                    f'loss: {total_loss/len(trainloader):.4f}, ' + \
                    f'time: {(time.time() - t):.4f}', end= '\r' if (step + 1) != len(trainloader) else '\n'
                )
    accuracy = accuracy_score(total_targets, total_outputs)
    print(f'Train Accuracy: {accuracy}')
    return total_loss, accuracy

In [None]:
def validate(epoch, model, device, optimizer, scheduler, validloader):
    model.eval()
    t = time.time()
    total_loss = 0
    total_targets = []
    total_outputs = []
    for step, (images, targets) in enumerate(validloader):
        loss, outputs = Loss_fn(images, targets, model, device)
        targets = targets.detach().cpu().numpy()
        outputs = outputs.detach().cpu().numpy()
        targets = targets.argmax(axis=1)
        outputs = outputs.argmax(axis=1)
        total_targets.extend(targets)
        total_outputs.extend(outputs)
        loss = loss.detach().item()
        total_loss += loss
        if ((step+1)%4==0 or (step+1)==len(validloader)):
            scheduler.step(total_loss/len(validloader))
        if ((step+1)%10==0 or (step+1)==len(validloader)):
            print(
                    f'epoch {epoch} val step {step+1}/{len(validloader)}, ' + \
                    f'loss: {total_loss/len(validloader):.4f}, ' + \
                    f'time: {(time.time() - t):.4f}', end= '\r' if (step + 1) != len(validloader) else '\n'
                )
            
    accuracy = accuracy_score(total_targets, total_outputs)
    cm = confusion_matrix(total_targets, total_outputs)
    print(f'Validation Accuracy: {accuracy}')
    return total_loss, accuracy, cm

In [None]:
save_path = PATH + "/Models/"
latest_path = "/content/drive/MyDrive/Real Douments/MS Purdue/Spring 23/ML/train /Models/Best_model.pth"

trainloader, validloader = prepare_train_valid_dataloader(labels)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ImageRecgoModel(n_labels=100).to(device)

# LOad state dict
model.load_state_dict(torch.load(latest_path))

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.1, step_size=1)
num_epochs = 20
best_acc = 0
history = {}
history['loss'] = []
history['val_loss'] = []
history['accuracy'] = []
history['val_accuracy'] = []
best_val_acc = 0
for epoch in range(num_epochs):
    train_loss, train_accuracy = train(epoch, model, device, optimizer, scheduler, trainloader)
    with torch.no_grad():
        val_loss, val_accuracy, cm = validate(epoch, model, device, optimizer, scheduler, validloader)
    if val_accuracy>best_acc:
        torch.save(model.state_dict(),save_path + 'Best_model.pth')
    history['loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['accuracy'].append(train_accuracy)
    history['val_accuracy'].append(val_accuracy)

torch.save(model.state_dict(),save_path + 'Last_epoch_model.pth')


if val_accuracy>best_val_acc:
    torch.save(model.state_dict(),save_path +  'Best_model.pth')

       