In [8]:
import tarfile
import zipfile
import shutil
import random
import os
import cv2
import pathlib
import numpy as np
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt
from numpy.random import seed
from google.colab import drive
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import mixed_precision
from tensorflow.keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet import ResNet101

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import top_k_accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from tqdm.notebook import tqdm
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


# Import Dataset

In [2]:
random_seed = 42
seed(random_seed)
tf.random.set_seed(random_seed)
random.seed(random_seed)

In [15]:
%%capture
!tar --extract --verbose --file='/content/gdrive/MyDrive/Progetto_VIPM/dataset/train_clean.tar'
!tar --extract --verbose --file='/content/gdrive/MyDrive/Progetto_VIPM/dataset/val.tar'
with zipfile.ZipFile("/content/gdrive/MyDrive/Progetto_VIPM/dataset/val_degraded.zip","r") as zip_ref:
    zip_ref.extractall()

In [16]:
%%capture

csv_train_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/train_clean_info.csv", dtype=str)
csv_train_file.columns = ['filename', 'label']
parent_dir = "train_set_clean/"
labels = csv_train_file['label']

# divido le immagini in sottocartelle in base alla classe
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_train_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [17]:
%%capture

csv_test_clean_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/val_info.csv", dtype=str)
csv_test_clean_file.columns = ['filename', 'label']
parent_dir = "val_set/"
labels = csv_test_clean_file['label']

# divido le immagini in sottocartelle in base alla classe
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_clean_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [18]:
%%capture

csv_test_deg_file = pd.read_csv("/content/gdrive/MyDrive/Progetto_VIPM/annot/val_info.csv", dtype=str)
csv_test_deg_file.columns = ['filename', 'label']
parent_dir = "val_set_degraded/"
labels = csv_test_deg_file['label']

# divido le immagini in sottocartelle in base alla classe
for label in labels:
    path = os.path.join(parent_dir, label)
    os.makedirs(path, exist_ok=True)    
for _, row in csv_test_deg_file.iterrows():
    label = row['label']
    path = os.path.join(parent_dir, row['filename'])
    img_name = os.path.split(path)[-1]
    new_path = os.path.join(parent_dir, label, img_name)
    print(new_path)
    shutil.copy(path, new_path)

In [19]:
train_data_dir = pathlib.Path('train_set_clean/')
shuffle_value = False
batch_size = 128

train_clean_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_dir,
    labels="inferred",
    label_mode="categorical",
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value,
    seed=random_seed)

Found 114153 files belonging to 251 classes.


In [20]:
test_clean_data_dir = pathlib.Path('val_set/')
#shuffle_value = True
shuffle_value = False
batch_size = 128

test_clean_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_clean_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value,
    seed=random_seed)

Found 11993 files belonging to 251 classes.


In [22]:
test_deg_data_dir = pathlib.Path('val_set_degraded/')
#shuffle_value = True
shuffle_value = False
batch_size = 128

test_deg_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_deg_data_dir,
    labels="inferred",
    label_mode="categorical",
    subset=None,
    image_size=(224,224),
    batch_size=batch_size,
    shuffle=shuffle_value,
    seed=random_seed)

Found 11993 files belonging to 251 classes.


In [23]:
def preprocess(images, labels):
  return tf.keras.applications.mobilenet.preprocess_input(images), labels

train_clean_ds = train_clean_ds.map(preprocess)
test_clean_ds = test_clean_ds.map(preprocess)
test_deg_ds = test_deg_ds.map(preprocess)

# Feature Extraction - Resnet50

In [24]:
#Creazione modello
base_model = ResNet50(weights='imagenet', include_top=True, input_shape= (224, 224, 3))
model = Model(inputs = base_model.input , outputs=base_model.get_layer('avg_pool').output)
base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                    

In [25]:
# Estraggo Features Train + Reshape Features
feat_train = model.predict(train_clean_ds)
#feat_train = np.array( [feat_train[i].flatten() for i in range(x_train.shape[0])])



In [26]:
# Estraggo Feature Test + Reshape Features
feat_test = model.predict(test_clean_ds)
#feat_test = np.array( [feat_test[i].flatten() for i in range(x_val.shape[0])])



In [27]:
# Estraggo Feature Test Degradato + Reshape Features
feat_test_deg = model.predict(test_deg_ds)
#feat_test = np.array( [feat_test[i].flatten() for i in range(x_val.shape[0])])



In [28]:
# Preparo label dal dataset
y_train = np.concatenate([y for x, y in train_clean_ds], axis=0)
y_test = np.concatenate([y for x, y in test_clean_ds], axis=0)
y_test_deg = np.concatenate([y for x, y in test_deg_ds], axis=0)

In [29]:
# Classificatore
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score

knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(feat_train, y_train)

# Predizioni
predizioni = knn.predict(feat_test)

# Da One-Hot encoding a interi
y_test = class_labels = np.argmax(y_test, axis=1)
predizioni = class_labels = np.argmax(predizioni, axis=1)


# Matrice di confusione
result = confusion_matrix(y_test, predizioni)
accuracy_score(y_test, predizioni)

0.005253064287501042

In [30]:
# Predizioni - Test set degradato
predizioni2 = knn.predict(feat_test_deg)

# Da One-Hot encoding a interi
y_test_deg = class_labels = np.argmax(y_test_deg, axis=1)
predizioni2 = class_labels = np.argmax(predizioni2, axis=1)


# Matrice di confusione
result = confusion_matrix(y_test_deg, predizioni2)
accuracy_score(y_test_deg, predizioni2)

0.004752772450596181