In [None]:
import tarfile
import shutil
import os
import cv2
import pathlib
import numpy as np
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab import drive
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.resnet import ResNet101

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix

# Importazione dataset

In [None]:
%%capture
drive.mount('/content/gdrive', force_remount=True)
!tar --extract --verbose --file='/content/gdrive/MyDrive/Progetto_VIPM/dataset/train_balanced.tar'
!tar --extract --verbose --file='/content/gdrive/MyDrive/Progetto_VIPM/dataset/val.tar'
!unzip '/content/gdrive/MyDrive/Progetto_VIPM/dataset/val_degraded.zip' -d '/content/'

## Train

In [None]:
%%capture
csv_train_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/train_balanced_info.csv", dtype=str)
csv_train_file.columns = ['filename', 'label']
parent_dir = "train_set/"
labels = csv_train_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_train_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    shutil.copy(path, new_path)

In [None]:
train_data_dir = pathlib.Path('train_set/')
seed_train_validation = 42 # must be same for train_ds and val_ds
shuffle_value = True
validation_split = 0.2
batch_size = 512

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_dir,
    labels="inferred",
    label_mode="categorical",
    validation_split=validation_split,
    subset="training",
    seed=seed_train_validation,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_dir,
    labels="inferred",
    label_mode="categorical",
    validation_split=validation_split,
    subset="validation",
    seed=seed_train_validation,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

Found 125499 files belonging to 251 classes.
Using 100400 files for training.
Found 125499 files belonging to 251 classes.
Using 25099 files for validation.


## Validation

In [None]:
%%capture
csv_test_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/val_info.csv", dtype=str)
csv_test_file.columns = ['filename', 'label']
parent_dir = "val_set/"
labels = csv_test_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [None]:
%%capture
csv_test_degraded_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/val_info.csv", dtype=str)
csv_test_degraded_file.columns = ['filename', 'label']
parent_dir = "val_set_degraded/"
labels = csv_test_degraded_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [None]:
test_data_dir = pathlib.Path('val_set/')
test_degraded_data_dir = pathlib.Path('val_set_degraded/')

shuffle_value = True

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

test_degraded_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_degraded_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

Found 11993 files belonging to 251 classes.
Found 11993 files belonging to 251 classes.


# Creazione modello

In [None]:
base_eff_model = EfficientNetB0(weights="imagenet", include_top=True, input_shape=(224,224,3))
base_eff_model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0.h5
Model: "efficientnetb0"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling (Rescaling)          (None, 224, 224, 3)  0           ['input_1[0][0]']                
                                                                                                  
 normalization (Normalization)  (None, 224, 224, 3)  7           ['rescaling[0][0]']              
                                                                                              

In [None]:
trainable_layer = False
for layer in base_eff_model.layers:
    if layer.name == "block7a_expand_conv":
        trainable_layer=True
    layer.trainable=trainable_layer

eff_model = Model(inputs=base_eff_model.input, outputs=base_eff_model.get_layer('top_dropout').output)
last_layer = base_eff_model.get_layer('top_dropout').output
flatten_layer = Flatten()(last_layer)
classification_layer = Dense(251, activation='softmax')(flatten_layer)
eff_model = Model(inputs=eff_model.input, outputs=classification_layer)
# eff_model.summary()

In [None]:
eff_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)])
history = eff_model.fit(train_ds, epochs=10, verbose=1, validation_data=val_ds)
eff_model.save('/content/gdrive/MyDrive/Progetto_VIPM/models/effNetB0_balanced.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
history = eff_model.fit(train_ds, epochs=5, verbose=1, validation_data=val_ds)
eff_model.save('/content/gdrive/MyDrive/Progetto_VIPM/models/effNetB0_balanced.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
eff_model = keras.models.load_model('/content/gdrive/MyDrive/Progetto_VIPM/models/effNetB0_balanced.h5')
eff_model.evaluate(test_ds, batch_size=512)
eff_model.evaluate(test_degraded_ds, batch_size=512)



[5.714931964874268, 0.20203451812267303, 0.32985907793045044]