In [None]:
import os
import numpy as np
import pandas as pd
import gc
import sys
import torch
from torch import nn
import torchvision
from torchvision import datasets, models, transforms as T
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import seaborn as sns
import random
from collections import Counter
from tqdm.autonotebook import tqdm, trange
from torch.optim import lr_scheduler
if 'efficientnet_pytorch' not in sys.modules:
    !pip install efficientnet_pytorch
if 'GPUtil' not in sys.modules:   
    !pip install GPUtil
import GPUtil
from efficientnet_pytorch import EfficientNet
from IPython.display import clear_output
from distutils.dir_util import copy_tree
import copy

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
SOURCE_DIR = 

In [None]:
class Transformator(Dataset):
    def __init__(self, data, transform=None, t_transform=None):
        self.transform = transform
        self.t_transform = t_transform
        self.data = data
    
    def __getitem__(self, idx):
        sample, target = self.data[idx]
        if self.transform:
            sample = self.transform(sample)
        if self.t_transform:
            target = self.t_transform(target)
        return sample, target
    
    def __len__(self):
        return len(self.data)

    
sns.set(rc={'axes.facecolor':'k',
            'figure.facecolor':'k',
            'xtick.color':'lightgray',
            'ytick.color':'lightgray',
            'grid.color': 'lightgray',
            'figure.figsize':(25,10.27)})


# device = xm.xla_device()
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
batch_size = 64
reznet152_size = 224
effb0_size = 224
effb3_size = 300
effb5_size = 456
effb7_size = 600
___size = effb3_size

transforms = {
    'train': T.Compose([
        T.Resize((___size, ___size)),
        T.RandomHorizontalFlip(),
        T.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': T.Compose([
        T.Resize((___size, ___size)),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

dataset = datasets.ImageFolder(f'{SOURCE_DIR}')
dataset.targets = list(map(lambda x: 1 if x == 0 else 0, dataset.targets))
classes = {v: k for k, v in dataset.class_to_idx.items()}
# dataset.class_to_idx = {k: 1 if v == 0 else 0  for k, v in dataset.class_to_idx.items()}

counts = Counter(dataset.targets)
cls_counts = [v for k, v in sorted(counts.items(), key=lambda x: x[0])]
weights = 1. / torch.DoubleTensor(cls_counts)
train_indices, val_indices = train_test_split(list(range(len(dataset.targets))), test_size=0.2, random_state=42, shuffle=True, stratify=dataset.targets)
samples_weight = np.array([weights[t] for t in np.array(dataset.targets)[train_indices]])
samples_weight = torch.from_numpy(samples_weight)
sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight))

train_dataset = Transformator(torch.utils.data.Subset(dataset, train_indices), transforms['train'])
val_dataset = Transformator(torch.utils.data.Subset(dataset, val_indices), transforms['val'])
dataloader = {'train': torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,  drop_last=True, sampler=sampler, num_workers=2),
             'val': torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, drop_last=True, num_workers=2)}

In [None]:
def train_model(model, criterion, optimizer, gpu, num_epochs=7, validate_model=True, lr_sch=None):
    global accuracies, losses, f1_, best_wts
    best_wts = model.state_dict()
    best_acc = 0
    best_f1 = 0
    losses = {"train": [], 'val': []}
    accuracies = {"train": [], 'val': []}
    f1_ = {"train": np.array([]), 'val': np.array([])}
    pbar = trange(num_epochs, desc="Epoch")
    for epoch in pbar:
        prs = {"train": np.array([]), 'val': np.array([])}
        lbs = {"train": np.array([]), 'val': np.array([])}
        len_data = 0
        correct = 0
        running_loss = 0
        for phase, loaderdata in dataloader.items():
            if not validate_model and phase == 'val':
                continue
            if phase == 'train':
                model.train()
            else:
                model.eval()
            for data in tqdm(loaderdata, leave=False, desc=f"{phase} iter"):
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                if phase == 'train':
                    optimizer.zero_grad()
                    out = model(inputs).to(device)
                else:
                    with torch.no_grad():
                        out = model(inputs)
                        loss = criterion(out, labels)
                preds = torch.argmax(out, -1)
                correct += sum(preds == labels).item()
                len_data += preds.shape[0]
                
                if phase == 'train':
                    loss = criterion(out, labels)
                    loss.backward()
                    optimizer.step()
                    if lr_sch:
                        lr_sch.step()
                
                running_loss += int(loss.item())
                prs[phase] = np.append(prs[phase], np.array(preds.cpu()))
                lbs[phase] = np.append(lbs[phase], np.array(labels.cpu()))
                del inputs, labels
                torch.cuda.empty_cache()
                gc.collect()
                
            epoch_acc = correct / len_data
            val_f1 = f1_score(prs[phase], lbs[phase], average='weighted')
            f1_[phase] = np.append(f1_[phase], val_f1)
            if phase == 'val' and (val_f1 > best_f1):
                best_acc = epoch_acc
                best_f1 = val_f1
                best_wts = model.state_dict()
                if val_f1 == 1:
                    print("Early stopping")
                    return model, best_wts
            accuracies[phase].append(epoch_acc)

            clear_output(wait=True)
            if validate_model and phase == 'val':
                plt.plot(list(range(len(accuracies['val']))), accuracies['val'], label='acc_val', color='red')
                plt.plot(list(range(len(f1_['val']))), f1_['val'], label='f1_val', color='green')
            plt.plot(list(range(len(accuracies['train']))), accuracies['train'], label='acc_train', color='cyan')
            plt.plot(list(range(len(f1_['train']))), f1_['train'], label='f1_train', color='white')
            legend = plt.legend()
            frame = legend.get_frame()
            frame.set_facecolor('white')
            plt.show()
            print(f"Epoch: {phase}, Acc: {epoch_acc}, F1: {val_f1}")
            
    
    print(f"Best accuracy: {best_acc}")
#     model.load_state_dict(best_wts)
    return model, best_wts


In [None]:
def evaluate(model, device):
    model.eval()
    runninig_correct = 0
    size_ = 0
    prs = {'val': np.array([])}
    lbs = {'val': np.array([])}
    for data in dataloader['val']:
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)
        output = model(inputs)
        _, predicted = torch.max(output, 1)
        runninig_correct += int(torch.sum(predicted == labels))
        size_ += predicted.shape[0]
        prs['val'] = np.append(prs['val'], np.array(predicted.cpu()))
        lbs['val'] = np.append(lbs['val'], np.array(labels.cpu()))
        del inputs, labels
        torch.cuda.empty_cache()
        gc.collect()
    return runninig_correct / size_, f1_score(prs['val'], lbs['val'], average='weighted')

In [None]:
effnet_b3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=len(classes))
effnet_b3 = effnet_b3.to(device)
effnet_b3_opt = torch.optim.Adam(effnet_b3.parameters())
effnet_b3_criterion = nn.CrossEntropyLoss()
effnet_b3, effnet_b3_wts = train_model(effnet_b3, effnet_b3_criterion, effnet_b3_opt, device, 20)

In [None]:
transforms = {
    'train': T.Compose([
        T.Resize((___size, ___size)),
        T.RandomHorizontalFlip(),
        T.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': T.Compose([
        T.Resize((___size, ___size)),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

pretrained = effnet_b3
pretrained = pretrained.cpu()
models = []
for n in range(7):
    print(f"Model {n + 1}")
    dataset = datasets.ImageFolder(f'{SOURCE_DIR}')
    counts = Counter(dataset.targets)
    cls_counts = [v for k, v in sorted(counts.items(), key=lambda x: x[0])]
    weights = 1. / torch.DoubleTensor(cls_counts)
    
    train_indices, val_indices = train_test_split(list(range(len(dataset.targets))), test_size=0.3, random_state=n, shuffle=True, stratify=dataset.targets)
    samples_weight = np.array([weights[t] for t in np.array(dataset.targets)[train_indices]])
    samples_weight = torch.from_numpy(samples_weight)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight))
    
    train_dataset = Transformator(torch.utils.data.Subset(dataset, train_indices), transforms['train'])
    val_dataset = Transformator(torch.utils.data.Subset(dataset, val_indices), transforms['val'])
    dataloader = {'train': torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,  drop_last=True, sampler=sampler, num_workers=2),
                 'val': torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, drop_last=True, num_workers=2)}
    
    model__ = EfficientNet.from_pretrained('efficientnet-b3', num_classes=len(classes))
    pretrained_wts = copy.deepcopy(pretrained.state_dict())
    model__.load_state_dict(pretrained_wts)
    model__ = model__.to(device)
    model__opt = torch.optim.Adam(model__.parameters())
    model__criterion = nn.CrossEntropyLoss()
    model__, model__wts = train_model(model__, model__criterion, model__opt, device, 2, validate_model=False)
    model__ = model__.cpu()
    models.append(model__)