In [None]:
import tarfile
import shutil
import random
import os
import cv2
import pathlib
import numpy as np
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt
from numpy.random import seed
from google.colab import drive
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import mixed_precision
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import top_k_accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


# Import Dataset

In [None]:
random_seed = 42
seed(random_seed)
tf.random.set_seed(random_seed)
random.seed(random_seed)

In [None]:
%%capture
!tar --extract --verbose --file='/content/gdrive/MyDrive/Progetto Visual/new_train_set.tar'
!unzip '/content/gdrive/MyDrive/Progetto Visual/val_set.zip'  
!unzip '/content/gdrive/MyDrive/Progetto Visual/val_set_degraded.zip'  

In [None]:
%%capture
csv_train_file = pd.read_csv("/content/gdrive/MyDrive/Progetto Visual/new_train_info.csv", dtype=str)
csv_train_file.columns = ['filename', 'label']
parent_dir = "new_train_set/"
labels = csv_train_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_train_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    shutil.copy(path, new_path)

In [None]:
csv_test_file = pd.read_csv("/content/gdrive/MyDrive/Progetto Visual/Annot/val_info.csv", dtype=str)
csv_test_file.columns = ['filename', 'label']
parent_dir = "val_set/"
labels = csv_test_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [None]:
train_data_dir = pathlib.Path('new_train_set/')
seed_train_validation = 42 # must be same for train_ds and val_ds
shuffle_value = True
validation_split = 0.2
batch_size = 256

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_dir,
    labels="inferred",
    label_mode="categorical",
    validation_split=validation_split,
    subset="training",
    seed=seed_train_validation,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_dir,
    labels="inferred",
    label_mode="categorical",
    validation_split=validation_split,
    subset="validation",
    seed=seed_train_validation,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

Found 250999 files belonging to 251 classes.
Using 200800 files for training.
Found 250999 files belonging to 251 classes.
Using 50199 files for validation.


In [None]:
test_data_dir = pathlib.Path('val_set/')
shuffle_value = True

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

Found 11993 files belonging to 251 classes.


In [None]:
def preprocess(images, labels):
  return tf.keras.applications.resnet50.preprocess_input(images), labels

train_ds = train_ds.map(preprocess)
val_ds = val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)

# Creazione Modello

In [None]:
baseModel = ResNet50(weights='imagenet', include_top=True, input_shape= (224, 224, 3))
baseModel.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                    

In [None]:
# Freeze layer del baseModel
for layer in baseModel.layers:
	layer.trainable = False

model = Model(inputs=baseModel.input, outputs=baseModel.get_layer('avg_pool').output)
last_layer = baseModel.get_layer('avg_pool').output
flatten_layer = Flatten()(last_layer)
classification_layer = Dense(251, activation='softmax')(flatten_layer)
model = Model(inputs=model.input, outputs=classification_layer)
model.summary()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            

In [None]:
def decay_schedule(epoch, lr):
    # decay by 0.1 every 5 epochs; 
    if (epoch % 5 == 0) and (epoch != 0):
        lr = lr * 0.1
    return lr
lr_scheduler = LearningRateScheduler(decay_schedule)
early_stop = EarlyStopping(monitor='val_loss', patience=50, verbose=1, restore_best_weights=True) 
best_model = ModelCheckpoint(checkpoint_filepath, verbose=1, monitor='val_loss', mode='min', save_best_only=True, save_weights_only=True)

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer='adam',  metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3)])
history = model.fit(train_ds, epochs=10, verbose=1, validation_data=val_ds, callbacks=[lr_scheduler, early_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
def plot_history(history):
  fig = make_subplots(rows=1, cols=3, subplot_titles=["Loss", "Top-1 Accuracy", "Top-3 Accuracy"])

  fig.add_trace(
      go.Scatter(y=history.history['val_loss'], name="val_loss"),
      row = 1, col = 1
  )

  fig.add_trace(
      go.Scatter(y=history.history['loss'], name="train_loss"),
      row = 1, col = 1
  )

  fig.add_trace(
      go.Scatter(y=history.history['val_accuracy'], name="val_top-1_accuracy"),
      row = 1, col = 2
  )

  fig.add_trace(
      go.Scatter(y=history.history['accuracy'], name="train_top-1_accuracy"),
      row = 1, col = 2
  )

  fig.add_trace(
      go.Scatter(y=history.history['val_top_k_categorical_accuracy'], name="val_top-3_accuracy"),
      row = 1, col = 3
  )

  fig.add_trace(
      go.Scatter(y=history.history['top_k_categorical_accuracy'], name="train_top-3_accuracy"),
      row = 1, col = 3
  )

  fig.update_xaxes(title_text="Epoch")
  fig.update_yaxes(title_text="Loss", row=1, col=1)
  fig.update_yaxes(title_text="Top-1 Accuracy", row=1, col=2)
  fig.update_yaxes(title_text="Top-3 Accuracy", row=1, col=3)
  fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
plot_history(history)

In [None]:
score = model.evaluate(test_ds, batch_size=512)
print('test loss, test acc, test top-3 acc:', score)

test loss, test acc, test top-3 acc: [2.111090898513794, 0.5021262168884277, 0.6837321519851685]


# Test degradato

In [None]:
%%capture
csv_test_degraded_file = pd.read_csv("/content/gdrive/MyDrive/Progetto Visual/Annot/val_info.csv", dtype=str)
csv_test_degraded_file.columns = ['filename', 'label']
parent_dir = "val_set_degraded/"
labels = csv_test_degraded_file['label']
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [None]:
test_degraded_data_dir = pathlib.Path('val_set_degraded/')
shuffle_value = True
test_degraded_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_degraded_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value)

Found 11993 files belonging to 251 classes.


In [None]:
score = model.evaluate(test_degraded_ds, batch_size=512)
print('test loss, test acc, test top-3 acc:', score)

test loss, test acc, test top-3 acc: [5.854011058807373, 0.06929042190313339, 0.1416659653186798]


In [None]:
#Risultati Resnet50 addestrata su Train set Augmented

#test loss, test acc, test top-3 acc: [2.0798654556274414, 0.501209020614624, 0.7011590003967285]
#test loss, test acc, test top-3 acc: [8.095725059509277, 0.032852496951818466, 0.07279246300458908]

In [None]:
model.save('/content/gdrive/MyDrive/Progetto Visual/Resnet50_augmented.h5')