<a href="https://colab.research.google.com/github/rubenIng93/OWR-ImageClassification/blob/main/OWR_MLDL_2021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

Mon Sep 20 16:05:35 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P8    74W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

**Imports**

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import random
from torch.utils.data import Subset, DataLoader, ConcatDataset, Dataset
import torch.optim as optim
import os
import time

**Variables init**

In [2]:
batch_size = 128
epochs = 70
n_classes = 100

**Cloning the repo**

In [3]:
# Clone github repository with data
if not os.path.isdir('./OWR_Tools'):
  !git clone https://github.com/rubenIng93/OWR-ImageClassification
  !mv 'OWR-ImageClassification' 'OWR_Tools'

Cloning into 'OWR-ImageClassification'...
remote: Enumerating objects: 922, done.[K
remote: Counting objects: 100% (247/247), done.[K
remote: Compressing objects: 100% (187/187), done.[K
remote: Total 922 (delta 138), reused 146 (delta 60), pack-reused 675[K
Receiving objects: 100% (922/922), 2.72 MiB | 15.47 MiB/s, done.
Resolving deltas: 100% (541/541), done.


**Dataset preparation**

In [4]:
# define the transformation
train_transform = transforms.Compose(
                    [transforms.RandomCrop(size = 32, padding=4),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2009,  0.1984,  0.2023))]
                    # normalized wrt the real cifar100 dataset
                )

test_transform = transforms.Compose(
                    [transforms.ToTensor(),
                     #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
                    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2009,  0.1984,  0.2023))]
                    # normalized wrt the real cifar100 dataset
                )

In [5]:
from OWR_Tools.owr_dataset import Cifar100Dataset
trainset = Cifar100Dataset(split='train', transform=train_transform, open_world=True)
testset = Cifar100Dataset(split='test', transform=test_transform, open_world=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
from OWR_Tools.resnet import resnet32 as rn32
from OWR_Tools.utils import *
from OWR_Tools.cosine_resnet import resnet32 as cos_rn32
from OWR_Tools.icarl import iCaRLTrainer
from OWR_Tools.classifiers_study import CSEnvironment
from OWR_Tools.losses_study import Loss_Experiments
from OWR_Tools.open_world import Open_World
from OWR_Tools.variation_v4 import Variations_Model
from OWR_Tools.train_test import TrainTester
#resnet32 = cos_rn32().cuda()

In [None]:
# BASELINES

# instantiate the file writer
file_writer = FileWriter("baselines.dat")
# create the train class
engine = TrainTester(
    [2],
    file_writer,
    trainset,
    testset,
    20, #epochs
    resnet32,
    70, # splits
    batch_size,
    'finetuning'
    )

# run the training
engine.run_loop()

In [None]:
# LOSS STUDY

# instantiate the file writer
file_writer = FileWriter("l2_ce_accuracy.dat")
# create the train class
losses = Loss_Experiments(
    [145],
    file_writer,
    trainset,
    testset,
    1, #epochs
    resnet32,
    10, # splits
    batch_size,
    'ce', # classification loss
    'l2' # distillation loss
    )

# run the training
losses.run_loop()

In [None]:
# CLASSIFIER STUDY

# instantiate the file writer
file_writer = FileWriter("cosine_classifier_Adam.dat")
# create the train class
cs_study = CSEnvironment(
    [144],
    file_writer,
    trainset,
    testset,
    70, #epochs
    resnet32,
    10, # splits
    batch_size,
    'KNN'
    )

# run the training
start = time.time()
cs_study.run_loop()
end = time.time()
print(f"Execution Time: {(end-start)/60} m")

In [None]:
# OPEN WORLD

thresholds = [.5, .6, .7, .8, .9, .95]

# instantiate the file writer
file_writer = FileWriter("owr_accuracy.dat", open_world=True)
# create the train class
owr = Open_World(
    [144],
    file_writer,
    trainset,
    testset,
    70, #epochs
    resnet32,
    5, # splits
    batch_size,
    thresholds,
    naive=False
    )

# run the training
start = time.time()
owr.run_loop()
end = time.time()
print(f"Execution Time: {(end-start)/60} m")

In [None]:
# VARIATION

# instantiate the file writer
file_writer = FileWriter("variation.dat")
# create the train class
variation = Variations_Model(
    [2],
    file_writer,
    trainset,
    testset,
    70, #epochs
    resnet32,
    10, # splits
    batch_size    
    )

# run the training
variation.run_loop()

**Chart Scripts**

In [None]:
# OPEN WORLD CHART

data1 = unpickle('/content/OWR_Tools/result_files/open_world/harmonic_means_owr_144_var.pth')
data2 = unpickle('/content/OWR_Tools/result_files/open_world/harmonic_means_owr_145_var.pth')
data3 = unpickle('/content/OWR_Tools/result_files/open_world/harmonic_means_owr_2_var.pth')


tr1 = []
tr2 = []
tr3 = []
tr4 = []
tr5 = []
tr6 = []

for d1, d2, d3 in zip(data1, data2, data3):
  if isinstance(d1, dict):    
    tr1.append((d1['0.5']+ d2['0.5']+ d3['0.5'])/3*100)
    tr2.append((d1['0.6']+ d2['0.6']+ d3['0.6'])/3*100)
    tr3.append((d1['0.7']+ d2['0.7']+ d3['0.7'])/3*100)
    tr4.append((d1['0.8']+ d2['0.8']+ d3['0.8'])/3*100)
    tr5.append((d1['0.9']+ d2['0.9']+ d3['0.9'])/3*100)
    tr6.append((d1['0.95']+ d2['0.95']+ d3['0.95'])/3*100)

print(tr1)


plt.plot(range(1, 6), tr1, label='0.5', marker='.')
plt.plot(range(1, 6), tr2, label='0.6', marker='.')
plt.plot(range(1, 6), tr3, label='0.7', marker='.')
plt.plot(range(1, 6), tr4, label='0.8', marker='.')
plt.plot(range(1, 6), tr5, label='0.9', marker='.')
plt.plot(range(1, 6), tr6, label='0.95', marker='.')
plt.xlabel('split')
plt.ylabel('Accuracy [%]')
plt.suptitle('Harmonic mean between closed and open world')
plt.title('for different threshold values - variation')
plt.xticks([ 1, 2, 3, 4, 5])
plt.yticks(range(20,70,10))
plt.legend()
plt.grid()
plt.savefig('open_world_h_means_var.png')
plt.show()


In [None]:
# TREND SCRIPT

import matplotlib.pyplot as plt
import csv
import statistics
import numpy as np


files_paths = [
    '/content/OWR_Tools/result_files/icarl_adam_accuracies.dat',
    '/content/OWR_Tools/result_files/lwf_accuracies.dat',
    '/content/OWR_Tools/result_files/finetuning_accuracies.dat',
    '/content/OWR_Tools/result_files/var_final_acc.dat'
    ]


labels = ['iCaRL', 'LwF', 'Finetuning', 'Our variation']
fig, ax = plt.subplots(figsize=(10,7))

for i, data in enumerate(files_paths):

    split_0, split_1, split_2, split_3, split_4 = [], [], [], [], []
    split_5, split_6, split_7, split_8, split_9 = [], [], [], [], []

    with open(data, 'r') as _file:
        values = csv.reader(_file, delimiter='\t')
        header = True
        for row in values:
            if header:
                header = False
            else:
                split_0.append(float(row[1])*100)
                split_1.append(float(row[2])*100)
                split_2.append(float(row[3])*100)
                split_3.append(float(row[4])*100)
                split_4.append(float(row[5])*100)
                split_5.append(float(row[6])*100)
                split_6.append(float(row[7])*100)
                split_7.append(float(row[8])*100)
                split_8.append(float(row[9])*100)
                split_9.append(float(row[10])*100)

    mean_values = []
    stds = []
    list_of_lists = [split_0, split_1, split_2, split_3,
                    split_4, split_5, split_6, split_7, split_8, split_9]

    for _list in list_of_lists:
        mean_values.append(statistics.mean(_list))
        stds.append(statistics.stdev(_list))

    # 95% ci
    ci = 1.96 * np.array(stds) / 3**(0.5)
    #print(ci)
    x = range(1,11)

    means_np = np.array(mean_values)
    print(means_np)
    #ci_up = ci + means_np
    #ci_low = means_np - ci
    #ax.plot(x, mean_values, 'o-',label=labels[i])
    ax.errorbar(x, mean_values, yerr=stds, label=labels[i], linewidth=2.2)
    #ax.fill_between(x, ci_up, ci_low, alpha=0.2, label='95% ci')

# Edit the layout

ax.legend(prop={'size':15})
ax.grid()
ax.set_ylim(0,100)
ax.set_yticks(range(0,101,10))
ax.set_xticks(x)
ax.set_title('Variation - comparison w.r.t. baselines ', fontsize=17)
ax.set_xlabel('split', fontsize=15)
ax.set_ylabel('Accuracy [%]', fontsize=15)

fig.show()
plt.savefig('variation_comparison.png')

In [None]:
# Variation graph

new_calls = []
old_calls = []
new_acc = []
old_acc = []

with open('/content/OWR_Tools/result_files/var_nets.dat', 'r') as _file:
    values = csv.reader(_file, delimiter='\t')
    header = True
    count = 1
    for row in values:
        if header:
          header = False
        else:
          if count == 1:
            for split in range(1,10):
              new_calls.append(float(row[split])*100)
          elif count == 2:
            for split in range(1,10):
              old_calls.append(float(row[split])*100)
          elif count == 3:
            for split in range(1,10):
              new_acc.append(float(row[split])*100)
          elif count == 4:
            for split in range(1,10):
              old_acc.append(float(row[split])*100)
          count += 1

labels = [str(i) for i in range(2,11)]
x = np.arange(len(new_acc))
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, new_calls, width, label='New predictions',\
                alpha=0.6, color='tab:olive')
rects2 = ax.bar(x + width/2, old_calls, width, label='Old predictions', \
                alpha=0.6, color='tab:brown')

ax.plot(x, new_acc, label='New accuracy', linewidth=2)
ax.plot(x, old_acc, label='Old accuracy', linewidth=2)


ax.set_ylabel('Percentace accuracy / calls')
ax.set_xlabel('split')
ax.set_title('Variation performances')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
ax.grid()


fig.tight_layout()
plt.savefig('variation_aggregated.png')
plt.show()


