In [None]:
import os
import copy
import time
import glob
import random
from PIL import Image
from typing import List, Tuple

import numpy as np
import pylab as pl
from tqdm.auto import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from IPython import display
import wandb

import torch
from torch import nn
from torchvision import transforms, models
from torch.utils.data import Dataset
from torchvision.transforms import v2
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from torchvision.models.vision_transformer import VisionTransformer 

In [None]:
SEED = 42

random.seed(SEED)
torch.manual_seed(SEED)
OHE = False

## Dataset

In [None]:
class ImageDataset(Dataset):
    """General pytorch dataset.

    The data should be build with the following structure.
    """

    def __init__(
        self,
        img_paths: List[str],
        read_img_fn: callable,
        transform=None,
        do_ohe: bool = False
    ):
        self.__read_img_fn = read_img_fn
        
        self.__img_paths = img_paths
        self.__transform = transform
        self.__do_ohe = do_ohe

        self.n_classes = 0

    def __getitem__(self, index: int) -> Tuple[np.array, torch.Tensor]:
        img_path = self.__img_paths[index]
        label = int(img_path.split(os.path.sep)[-3])
        

        self.n_classes = max(self.n_classes, label)
        
        image = self.__read_img_fn(img_path)
        index = index % len(self.__img_paths)

        if self.__transform is not None:
            image = self.__transform(image)

        if self.__do_ohe:
            label_ohe = np.zeros((46))
            label_ohe[label] = 1
            label = label_ohe

        return image, label

    def __len__(self) -> int:
        return len(self.__img_paths)

In [None]:
train_path = glob.glob("./output/train_CO/**/**/image.png")
test_path = glob.glob("./output/test_CO/**/**/image.png")

transform = transforms.Compose([transforms.ToTensor(), v2.ToDtype(torch.float)])

train_ds = ImageDataset(train_path, Image.open, transform, do_ohe=OHE)
train_loader = DataLoader(train_ds, batch_size=100, shuffle=True)

test_ds = ImageDataset(test_path, Image.open, transform, do_ohe=OHE)
test_loader = DataLoader(test_ds, batch_size=100, shuffle=True)

In [None]:
def _show_progress(history, phases):
    plt.figure(figsize=(15,10))

    for phase in phases:
        pl.subplot(1, 2, 1)
        pl.title(f'Best Acc: {max(history[phase]["acc"])}')

        pl.plot(history[phase]['acc'], label=phase.capitalize())
        pl.legend()

    for phase in phases:
        pl.subplot(1, 2, 2)
        pl.title(f'Best loss: {min(history[phase]["loss"])}')
        pl.plot(history[phase]['loss'], label=phase.capitalize())

        pl.legend()
    display.clear_output(wait=True) 
    display.display(pl.gcf())
    plt.close() 
    
def train_model(
    model, 
    dataloaders, 
    criterion, 
    optimizer, 
    num_epochs=25, 
    is_inception=False, 
    do_validation=True, 
    regression=False,
    plot_acc=False,
):
    since = time.time()

    wandb.init(project="taixi", config={
        "epochs": num_epochs,
        "lr": LR,
        "first_n_layers": FIRST_N_LAYERS,
        "dropout": DROPOUT,
    })
    
    history = {}
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_acc_std = 0.0
    best_loss = 0.0
    best_loss_std = 0.0
    
    phases = ['train']
    
    if do_validation:
        phases.append('val')
        
    epoch_loss = 0
    epoch_acc = 0
    try:
        for epoch in range(num_epochs):
            # Each epoch has a training and validation phase
    
            res = dict()
            for phase in phases:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode
    
                running_loss = []
                running_acc = []
    
                pbar = tqdm(dataloaders[phase], desc='Time, he\'s waiting in the wings')
                # Iterate over data.
                for inputs, labels in pbar:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
    
                    inputs = inputs.type(torch.float32)
    
                    # zero the parameter gradients
                    optimizer.zero_grad()
    
                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        # Get model outputs and calculate loss
                        # Special case for inception because in training it has an auxiliary output. In train
                        #   mode we calculate the loss by summing the final output and the auxiliary output
                        #   but in testing we only consider the final output.
                        outputs = model(inputs)                      
                        loss = criterion(outputs, labels)
    
                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
    
                    # statistics
                    running_loss.append((loss.item() * inputs.size(0)) / len(outputs))
                    preds = torch.argmax(outputs, 1).detach().cpu()
    
                    # labels = torch.argmax(labels, 1).reshape(-1).detach().cpu()
                    labels = labels.reshape(-1).detach().cpu()
                    
                    # aux = ((preds > 0.5).double() == labels)
                    # acc = (torch.sum(aux).double().cpu().detach().numpy() / len(outputs))
                    acc = accuracy_score(labels, preds)
                    running_acc.append(acc)
                        
                    pbar.set_description('Epoch {}/{} - {} - ACC: {:.4f} LOSS: {:.4f}'.format(epoch, num_epochs - 1, phase.capitalize(), np.mean(running_acc), np.mean(running_loss)))
                
                epoch_loss = np.mean(running_loss)
                epoch_acc = np.mean(running_acc)
    
                res[f"{phase}_loss"] = epoch_loss 
                res[f"{phase}_accuracy"] = epoch_acc
    
                # deep copy the model
                if (phase == 'val' or not 'val' in phases) and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_loss = epoch_loss
    
                    best_loss_std = np.std(running_loss)
                    best_acc_std = np.std(running_acc)
    
                    best_model_wts = copy.deepcopy(model.state_dict())
    
                if phase not in history:
                    history[phase] = {"acc": [], "loss": []}
    
                history[phase]["acc"].append(epoch_acc)
                history[phase]["loss"].append(epoch_loss)
    
            wandb.log(res)
            if plot_acc:
                _show_progress(history, phases)
    except KeyboardInterrupt:
        print('Interrupted')
    except Exception as e:
        print(e)
        traceback.print_exc()
    finally:
        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        print('Best val Acc: [{:4f}-{:4f}]'.format(best_acc - best_acc_std/2, best_acc + best_acc_std/2))
        
        # load best model weights
        model.load_state_dict(best_model_wts)
        return model, history

In [None]:
LR = 0.00001
FIRST_N_LAYERS = 1
DROPOUT = 0.7

In [None]:
net = models.resnet50(pretrained=models.ResNet50_Weights.DEFAULT)

for i, param in enumerate(net.parameters()):
    if i == FIRST_N_LAYERS:
        break
    
    param.requires_grad = False

    
num_ftrs = net.fc.in_features
    
net.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)


net.fc = nn.Sequential(
    nn.Linear(num_ftrs, 128),
    nn.ReLU(inplace=True),
    nn.Dropout(p=DROPOUT),
    nn.Linear(128, 47),
)


In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

## Entrenament

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=LR)

net = net.to(device)

net, history = train_model(
    net, 
    {"train": train_loader, "val": test_loader}, 
    criterion, 
    optimizer,
    num_epochs=100, 
    do_validation=True,
    plot_acc=True,
    regression=False
)

In [None]:
# torch.save(net.state_dict(), "res_9.pt")

In [None]:
max(history['val']['acc'])

# An√†lisi

In [None]:
# net.load_state_dict(torch.load("res.pt", weights_only=True))
net = net.eval();

In [None]:
aux = np.zeros(5)
aux[2] = -5

np.argsort(aux)

In [None]:
results = []
ground_truth = []

In [None]:
for inputs, labels in tqdm(test_loader):
    res = net(inputs.to(device)).detach()
    
    results = results + torch.argmax(res, axis=-1).detach().cpu().tolist()
    ground_truth = ground_truth + labels.detach().cpu().tolist()

In [None]:
print(classification_report(ground_truth, results))

In [None]:
cm = confusion_matrix(ground_truth, results)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)

disp.plot(include_values=False);

In [None]:
np.argsort(cm[7])