In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms as T # for simplifying the transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models
import sys
from tqdm import tqdm
import time
import copy
import matplotlib.pyplot as plt
%matplotlib inline
!pip install timm 
import timm
from timm.loss import LabelSmoothingCrossEntropy # This is better than normal nn.CrossEntropyLoss
from torch.nn import CrossEntropyLoss
import warnings
warnings.filterwarnings("ignore")
!pip install split-folders
import splitfolders
!pip install opencv-python
import cv2
from PIL import Image
!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[K     |████████████████████████████████| 549 kB 14.4 MB/s 
Collecting huggingface-hub
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 72.6 MB/s 
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.11.1 timm-0.6.12
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting efficientnet_pytorch
  Downlo

In [None]:
import wandb
wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
PROJECT = 'homework2_ML'
DATA_DIR = 'data'
ARTIFACT_PATH = '/content/w&b/'

#Utilities for dataset manipulation

In [None]:
def get_classes(data_dir):
    all_data = datasets.ImageFolder(data_dir)
    return all_data.classes

In [None]:
def get_data_loaders(data_dir, batch_size, train = False):
    if train:
        #train
        transform = T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomVerticalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter()]), p=0.25),
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
            T.RandomErasing(p=0.2, value='random')
        ])
        train_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform = transform)
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return train_loader, len(train_data)
    else:
        # val/test
        transform = T.Compose([ # We dont need augmentation for test transforms
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
        ])
        val_data = datasets.ImageFolder(os.path.join(data_dir, "valid/"), transform=transform)
        test_data = datasets.ImageFolder(os.path.join(data_dir, "test/"), transform=transform)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
        test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return val_loader, test_loader, len(val_data), len(test_data)

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
dataset_path_original = "/content/drive/MyDrive/ColabNotebooks/MachineLearning/HomeWork2/Dataset"
splitfolders.ratio(dataset_path_original, output="dataset_", seed=1337, ratio=(.8, 0.1,0.1)) 
classes = get_classes('/content/dataset_/train/')
print("Found : ",len(classes),"classes --->>>",classes)

Copying files: 2911 files [00:35, 82.42 files/s] 

Found :  10 classes --->>> ['Balcony', 'Bathroom', 'Bedroom', 'Fireplace', 'Garden', 'Hammok', 'Kitchen', 'Panoramic_view', 'Pool', 'Stairs']





In [None]:
dataset_path = "/content/dataset_"
batch_size = 32
training_batch_size = 128

In [None]:
#Rename the folder val in /dataset_ from val to valid
(train_loader, train_data_len) = get_data_loaders(dataset_path, training_batch_size , train=True)
(val_loader, test_loader, valid_data_len, test_data_len) = get_data_loaders(dataset_path, batch_size=batch_size, train=False)

In [None]:
dataloaders = {
    "train": train_loader,
    "val": val_loader
}
dataset_sizes = {
    "train": train_data_len,
    "val": valid_data_len
}

In [None]:
print("Batch size (val) = ", batch_size)
print("Batch size (training) = ", training_batch_size)
print("Batches per training: ", len(train_loader))
print("Batches per validation: ", len(val_loader))
print("Batches per test: ", len (test_loader) )

Batch size (val) =  32
Batch size (training) =  128
Batches per training:  19
Batches per validation:  9
Batches per test:  10


In [None]:
print(train_data_len, valid_data_len, test_data_len)

2324 287 300


#Log dati a Wandb

Log alcune immagini del dataset

In [None]:
wandb.init(project=PROJECT , job_type="log_dataset")
table = wandb.Table(columns=["image", "label"])
for step, (img, label) in enumerate(tqdm(train_loader, leave=False)):
    w_img = wandb.Image(np.transpose(img[0].numpy(), axes=(1,2,0)))
    table.add_data(w_img, classes[label.numpy()[0]])
    if step == 10: 
        break


wandb.log({'train_data/train_table':table})
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mrucci-2053183[0m ([33msapienza_ml_2022_23[0m). Use [1m`wandb login --relogin`[0m to force relogin




VBox(children=(Label(value='0.727 MB of 0.727 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.999620…

Log distribuzione dei batches

In [None]:
from collections import Counter
dat = datasets.ImageFolder(dataset_path_original)
classes = []
distrib = {}

for item in dat.class_to_idx.keys():
  classes.append(item)

for i,item in enumerate(dict(Counter(dat.targets)).values()):
  distrib[classes[i]] = item

distrib

{'Balcony': 218,
 'Bathroom': 300,
 'Bedroom': 306,
 'Fireplace': 266,
 'Garden': 321,
 'Hammok': 203,
 'Kitchen': 321,
 'Panoramic_view': 333,
 'Pool': 355,
 'Stairs': 288}

In [None]:
wandb.init(project=PROJECT , job_type="dataset_distribution")
wandb.log({'data_distrib': distrib})
wandb.finish()

#Scelta del Modello

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Flying on the ->",device)

Flying on the -> cuda


In [None]:
config = {
    "learning_rate": 0.001,
    "num_epochs": 300,
    "batch_size": 32,
    "training_batch_size": 128,
    "img_size": (224, 224, 3),
    'backbone': 'efficientNet'
}

In [None]:
LR = config['learning_rate']
EPOCHS = config['num_epochs']

# Log the final results on the validation set
LOG_IMAGES = True
LOG_CONFUSION_MATRIX = True





In [None]:
def build_model(num_classes=10, backbone='deit'):
    if backbone == 'deit':
      model = torch.hub.load('facebookresearch/deit:main', 'deit_tiny_patch16_224', pretrained=True)
      for param in model.parameters(): #freeze model
        param.requires_grad = False

      n_inputs = model.head.in_features
      model.head = nn.Sequential(
          nn.Linear(n_inputs, 512),
          nn.ReLU(),
          nn.Dropout(0.3),
          nn.Linear(512, len(classes))
      )
      model = model.to(device)
    
    if backbone == 'efficientNet':
      model = EfficientNet.from_pretrained('efficientnet-b0')
      n_inputs = model.head.in_features
      model.head = nn.Sequential(
          nn.Linear(n_inputs, 512),
          nn.ReLU(),
          nn.Dropout(0.3),
          nn.Linear(512, len(classes))
      )
      model = model.to(device)

      
    return model


In [None]:
model = build_model(backbone=config['backbone'])
criterion = LabelSmoothingCrossEntropy()
criterion = criterion.to(device)
optimizer = optim.Adam(model.parameters(), config['learning_rate'])

Loaded pretrained weights for efficientnet-b0


In [None]:
# lr scheduler
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)

Inizializzazione Wandb

In [None]:
wandb.init(project=PROJECT, sync_tensorboard=True, job_type='training_logs')
wandb.config.update(config)

[34m[1mwandb[0m: Currently logged in as: [33mrucci-2053183[0m ([33msapienza_ml_2022_23[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=config['num_epochs'],callbacks=None):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    wandb.watch(model, log_freq=10)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print("-"*10)
        
        for phase in ['train', 'val']: # We do training and validation phase per epoch
            if phase == 'train':
                
                model.train() # model to training mode
            else:
                model.eval() # model to evaluate
            
            running_loss = 0.0
            running_corrects = 0.0
            
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'): # no autograd makes validation go faster
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) # used for accuracy
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            if phase == 'train':
                scheduler.step() # step at end of epoch
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc =  running_corrects.double() / dataset_sizes[phase]
            if epoch % 10 == 0: #ogi 10 epoche printo su wandb
              wandb.log({phase+"_loss": epoch_loss})
              wandb.log({phase+"_acc": epoch_acc})

            
            print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict()) # keep the best validation accuracy model
        print()
    time_elapsed = time.time() - since # slight error
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print("Best Val Acc: {:.4f}".format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler,callbacks=None)
wandb.finish()

Epoch 0/299
----------


100%|██████████| 19/19 [00:25<00:00,  1.36s/it]


train Loss: 3.4833 Acc: 0.4836


100%|██████████| 9/9 [00:01<00:00,  5.12it/s]


val Loss: 2.8613 Acc: 0.7561

Epoch 1/299
----------


100%|██████████| 19/19 [00:20<00:00,  1.07s/it]


train Loss: 1.6549 Acc: 0.8352


100%|██████████| 9/9 [00:01<00:00,  6.04it/s]


val Loss: 2.2001 Acc: 0.8537

Epoch 2/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.00it/s]


train Loss: 1.4090 Acc: 0.9045


100%|██████████| 9/9 [00:01<00:00,  6.05it/s]


val Loss: 1.7381 Acc: 0.8641

Epoch 3/299
----------


100%|██████████| 19/19 [00:19<00:00,  1.01s/it]


train Loss: 1.3010 Acc: 0.9389


100%|██████████| 9/9 [00:02<00:00,  3.62it/s]


val Loss: 1.7775 Acc: 0.8676

Epoch 4/299
----------


100%|██████████| 19/19 [00:20<00:00,  1.10s/it]


train Loss: 1.2320 Acc: 0.9651


100%|██████████| 9/9 [00:01<00:00,  6.07it/s]


val Loss: 1.6410 Acc: 0.8606

Epoch 5/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.2059 Acc: 0.9651


100%|██████████| 9/9 [00:01<00:00,  6.18it/s]


val Loss: 1.5789 Acc: 0.8711

Epoch 6/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.1966 Acc: 0.9699


100%|██████████| 9/9 [00:01<00:00,  6.02it/s]


val Loss: 1.4994 Acc: 0.8571

Epoch 7/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.1543 Acc: 0.9836


100%|██████████| 9/9 [00:01<00:00,  6.15it/s]


val Loss: 1.4323 Acc: 0.9094

Epoch 8/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.1405 Acc: 0.9845


100%|██████████| 9/9 [00:01<00:00,  6.40it/s]


val Loss: 1.4754 Acc: 0.8920

Epoch 9/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.1211 Acc: 0.9927


100%|██████████| 9/9 [00:01<00:00,  6.09it/s]


val Loss: 1.4268 Acc: 0.8815

Epoch 10/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.1099 Acc: 0.9901


100%|██████████| 9/9 [00:01<00:00,  5.86it/s]


val Loss: 1.3369 Acc: 0.9094

Epoch 11/299
----------


100%|██████████| 19/19 [00:19<00:00,  1.00s/it]


train Loss: 1.1197 Acc: 0.9875


100%|██████████| 9/9 [00:01<00:00,  6.21it/s]


val Loss: 1.5253 Acc: 0.8780

Epoch 12/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.1088 Acc: 0.9918


100%|██████████| 9/9 [00:01<00:00,  6.16it/s]


val Loss: 1.4273 Acc: 0.8990

Epoch 13/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.0995 Acc: 0.9923


100%|██████████| 9/9 [00:01<00:00,  6.26it/s]


val Loss: 1.4197 Acc: 0.8920

Epoch 14/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.1082 Acc: 0.9892


100%|██████████| 9/9 [00:01<00:00,  6.28it/s]


val Loss: 1.4808 Acc: 0.8711

Epoch 15/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0895 Acc: 0.9935


100%|██████████| 9/9 [00:01<00:00,  5.94it/s]


val Loss: 1.4293 Acc: 0.8815

Epoch 16/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.0869 Acc: 0.9923


100%|██████████| 9/9 [00:01<00:00,  5.97it/s]


val Loss: 1.4212 Acc: 0.8676

Epoch 17/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.1026 Acc: 0.9862


100%|██████████| 9/9 [00:02<00:00,  4.09it/s]


val Loss: 1.4581 Acc: 0.8920

Epoch 18/299
----------


100%|██████████| 19/19 [00:20<00:00,  1.10s/it]


train Loss: 1.1046 Acc: 0.9871


100%|██████████| 9/9 [00:01<00:00,  6.10it/s]


val Loss: 1.3865 Acc: 0.8850

Epoch 19/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.0986 Acc: 0.9871


100%|██████████| 9/9 [00:01<00:00,  6.11it/s]


val Loss: 1.3659 Acc: 0.8955

Epoch 20/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.04it/s]


train Loss: 1.0868 Acc: 0.9910


100%|██████████| 9/9 [00:01<00:00,  5.89it/s]


val Loss: 1.3795 Acc: 0.8955

Epoch 21/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.00it/s]


train Loss: 1.0896 Acc: 0.9897


100%|██████████| 9/9 [00:01<00:00,  6.35it/s]


val Loss: 1.4623 Acc: 0.8780

Epoch 22/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.0813 Acc: 0.9931


100%|██████████| 9/9 [00:01<00:00,  6.14it/s]


val Loss: 1.3522 Acc: 0.9129

Epoch 23/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.00it/s]


train Loss: 1.0787 Acc: 0.9927


100%|██████████| 9/9 [00:01<00:00,  6.43it/s]


val Loss: 1.3326 Acc: 0.9094

Epoch 24/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.0703 Acc: 0.9944


100%|██████████| 9/9 [00:01<00:00,  6.22it/s]


val Loss: 1.3416 Acc: 0.9129

Epoch 25/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0757 Acc: 0.9935


100%|██████████| 9/9 [00:01<00:00,  6.14it/s]


val Loss: 1.3169 Acc: 0.9164

Epoch 26/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0720 Acc: 0.9940


100%|██████████| 9/9 [00:01<00:00,  6.16it/s]


val Loss: 1.3716 Acc: 0.8990

Epoch 27/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0736 Acc: 0.9910


100%|██████████| 9/9 [00:01<00:00,  6.39it/s]


val Loss: 1.4168 Acc: 0.8920

Epoch 28/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0804 Acc: 0.9910


100%|██████████| 9/9 [00:01<00:00,  6.09it/s]


val Loss: 1.4583 Acc: 0.8746

Epoch 29/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.03it/s]


train Loss: 1.0808 Acc: 0.9888


100%|██████████| 9/9 [00:01<00:00,  6.21it/s]


val Loss: 1.5128 Acc: 0.8641

Epoch 30/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.04it/s]


train Loss: 1.0810 Acc: 0.9897


100%|██████████| 9/9 [00:01<00:00,  5.78it/s]


val Loss: 1.5252 Acc: 0.8641

Epoch 31/299
----------


100%|██████████| 19/19 [00:19<00:00,  1.02s/it]


train Loss: 1.0729 Acc: 0.9914


100%|██████████| 9/9 [00:02<00:00,  4.05it/s]


val Loss: 1.4269 Acc: 0.8850

Epoch 32/299
----------


100%|██████████| 19/19 [00:19<00:00,  1.02s/it]


train Loss: 1.0779 Acc: 0.9914


100%|██████████| 9/9 [00:01<00:00,  6.20it/s]


val Loss: 1.5147 Acc: 0.8676

Epoch 33/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


train Loss: 1.0877 Acc: 0.9875


100%|██████████| 9/9 [00:01<00:00,  5.96it/s]


val Loss: 1.4496 Acc: 0.8711

Epoch 34/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.0692 Acc: 0.9944


100%|██████████| 9/9 [00:01<00:00,  6.07it/s]


val Loss: 1.4782 Acc: 0.8920

Epoch 35/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.0668 Acc: 0.9944


100%|██████████| 9/9 [00:01<00:00,  6.13it/s]


val Loss: 1.4288 Acc: 0.8885

Epoch 36/299
----------


100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


train Loss: 1.0621 Acc: 0.9966


100%|██████████| 9/9 [00:01<00:00,  6.13it/s]


val Loss: 1.3322 Acc: 0.9094

Epoch 37/299
----------


 68%|██████▊   | 13/19 [00:13<00:05,  1.12it/s]

In [None]:
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model.cpu(), example)
# traced_script_module.save("VisionTransformer_DEIT_Facebook.pt")
traced_script_module.save("EfficientNet_b0.pt")
# traced_script_module.save("/content/drive/MyDrive/ColabNotebooks/MachineLearning/HomeWork2/VisionTransformer_DEIT_Facebook.pt")
traced_script_module.save("/content/drive/MyDrive/ColabNotebooks/MachineLearning/HomeWork2/EfficientNet_b0.pt")

#Import the model 


In [None]:
#Deit
model = torch.load("/content/drive/MyDrive/ColabNotebooks/MachineLearning/HomeWork2/VisionTransformer_DEIT_Facebook.pt")

#Testing