In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [2]:
import zipfile
from skimage.filters import laplace, sobel
import os
import cv2
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,f1_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from matplotlib import pyplot as plt
import seaborn as sns

In [3]:
#Estrazione dei file nell'ambiente Colab
image_data = zipfile.ZipFile('/content/gdrive/MyDrive/Digital/archive.zip')
image_data.extractall()
image_data.close()

In [7]:
# Estrazione delle features dalle immagini
def get_data():
    path = '/content/'
    features = []
    for label in ['sharp/', 'defocused_blurred/', 'motion_blurred/']:
        for img in os.listdir(path + label):
            feature = []
            feature.append(device(img))
            image_grey = cv2.imread(path + label + img, 0)
            feature.extend(sobel_filter(image_grey))  # Media, varianza e massimo del filtro di Sobel
            feature.extend(laplace_filter(image_grey))  # Media, varianza e massimo del filtro di Laplace
            #feature.extend(gabor_filter(image_grey))  # Istanza dei filtri di Gabor
            features.append(feature)

    return features

In [5]:
import cv2
import numpy as np
from skimage.filters import gabor
from skimage import exposure

def sobel_filter(input):
  sob_feat = sobel(input)
  return sob_feat.mean(),sob_feat.var(),np.max(sob_feat)

def laplace_filter(input):
  lap_feat = laplace(input)
  return lap_feat.mean(),lap_feat.var(),np.max(lap_feat)

def device(path):
  return path.split('_')[1]


Creazione del Dataset

In [8]:
dataset = pd.DataFrame(get_data())

In [9]:
dataset.rename(
    {
        0:'device',
        1:'sob_mean',
        2:'sob_var',
        3:'sob_max',
        4:'lap_mean',
        5:'lap_var',
        6:'lap_max'
     },
     axis = 1,
     inplace = True)

In [10]:
dataset.head()

Unnamed: 0,device,sob_mean,sob_var,sob_max,lap_mean,lap_var,lap_max
0,PRESTIGIO-MULTI-PHONE,0.057461,0.004084,0.658007,-1.721312e-18,0.019647,1.878431
1,IPHONE-XR,0.022152,0.000703,0.558312,-6.7416759999999995e-19,0.006308,1.109804
2,HONOR-8X,0.010129,0.000496,0.521654,-9.810053e-19,0.000804,2.729412
3,XIAOMI-MI8-SE,0.018852,0.000501,0.462157,-6.296277999999999e-20,0.008131,0.839216
4,NIKON-D3400-35MM,0.025723,0.00403,0.74563,-9.860282999999999e-19,0.025105,3.411765


In [11]:
len(dataset)

1050

In [12]:
import os, os.path
DIR = 'blur_dataset_scaled/defocused_blurred/'
print (len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]))

350


In [13]:
import os, os.path
DIR = 'blur_dataset_scaled/motion_blurred/'
print (len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]))

350


In [14]:
import os, os.path
DIR = 'blur_dataset_scaled/sharp/'
print (len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]))

350


In [15]:
#Categorizzazione della variabile 'device' tramite One-hot Encoder
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
le = LabelEncoder()
dataset_he = pd.concat([dataset, pd.DataFrame(to_categorical(le.fit_transform(dataset['device']), dtype='int'))], axis=1)


In [16]:
dataset_he

Unnamed: 0,device,sob_mean,sob_var,sob_max,lap_mean,lap_var,lap_max,0,1,2,...,56,57,58,59,60,61,62,63,64,65
0,PRESTIGIO-MULTI-PHONE,0.057461,0.004084,0.658007,-1.721312e-18,0.019647,1.878431,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,IPHONE-XR,0.022152,0.000703,0.558312,-6.741676e-19,0.006308,1.109804,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,HONOR-8X,0.010129,0.000496,0.521654,-9.810053e-19,0.000804,2.729412,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,XIAOMI-MI8-SE,0.018852,0.000501,0.462157,-6.296278e-20,0.008131,0.839216,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,NIKON-D3400-35MM,0.025723,0.004030,0.745630,-9.860283e-19,0.025105,3.411765,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1045,XIAOMI-REDMI-7,0.020175,0.000205,0.383116,-1.493601e-18,0.016181,1.019608,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1046,HONOR-7X,0.010827,0.000152,0.318977,-8.737851e-19,0.000553,0.584314,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1047,IPHONE-SE,0.008736,0.000049,0.140566,2.238285e-19,0.000503,0.360784,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1048,SONY-XPERIA-E5,0.018701,0.000446,0.349203,1.355253e-20,0.001464,1.066667,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
#Eliminiamo la variabile 'device' originale
dataset_he = dataset_he.drop(['device'], axis = 1)

In [18]:
dataset_he.head()

Unnamed: 0,sob_mean,sob_var,sob_max,lap_mean,lap_var,lap_max,0,1,2,3,...,56,57,58,59,60,61,62,63,64,65
0,0.057461,0.004084,0.658007,-1.721312e-18,0.019647,1.878431,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.022152,0.000703,0.558312,-6.7416759999999995e-19,0.006308,1.109804,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.010129,0.000496,0.521654,-9.810053e-19,0.000804,2.729412,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.018852,0.000501,0.462157,-6.296277999999999e-20,0.008131,0.839216,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.025723,0.00403,0.74563,-9.860282999999999e-19,0.025105,3.411765,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


ML & Feature Engeneering

In [19]:
y = np.concatenate((np.repeat('sharp', 350), np.repeat('defocused', 350), np.repeat('motion', 350)))

In [20]:
X_train,X_test,y_train,y_test = train_test_split(dataset_he,y,test_size=0.20, stratify=y)

Decision Tree

In [21]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
X_train.columns = X_train.columns.astype(str)
# Crea e addestra il modello
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)


In [22]:
pred_tree = decision_tree.predict(X_test.values)



In [23]:
X_test.values

array([[1.02496068e-02, 5.55922634e-05, 1.21766127e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [7.14097240e-03, 6.72134688e-05, 1.99672935e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [8.83388399e-03, 3.44123783e-04, 4.98768187e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.96261337e-02, 6.83401698e-04, 3.01210234e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [9.30967790e-03, 4.86109390e-05, 1.11820591e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.86445615e-02, 3.62671147e-03, 4.69715285e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [24]:
print('Confusion matrix:\n',confusion_matrix(y_test,pred_tree))
print('Classification_report:\n',classification_report(y_test,pred_tree))

Confusion matrix:
 [[53 17  0]
 [20 37 13]
 [ 1 10 59]]
Classification_report:
               precision    recall  f1-score   support

   defocused       0.72      0.76      0.74        70
      motion       0.58      0.53      0.55        70
       sharp       0.82      0.84      0.83        70

    accuracy                           0.71       210
   macro avg       0.70      0.71      0.71       210
weighted avg       0.70      0.71      0.71       210



In [25]:
importance = pd.DataFrame(decision_tree.feature_importances_, index=dataset_he.columns,
                          columns = ['variable_importance'])
importance.sort_values(by='variable_importance', ascending=False)


Unnamed: 0,variable_importance
sob_var,0.414032
sob_max,0.230425
lap_max,0.094084
lap_var,0.086232
sob_mean,0.066588
...,...
39,0.000000
40,0.000000
41,0.000000
42,0.000000


Dopo aver notato che la variabile device non è rilevante si è deciso di elimanrla

In [26]:
dataset_he_rid = dataset_he.iloc[:,0:6]
#Eliminata variabile device

In [27]:
dataset_he_rid.head()

Unnamed: 0,sob_mean,sob_var,sob_max,lap_mean,lap_var,lap_max
0,0.057461,0.004084,0.658007,-1.721312e-18,0.019647,1.878431
1,0.022152,0.000703,0.558312,-6.7416759999999995e-19,0.006308,1.109804
2,0.010129,0.000496,0.521654,-9.810053e-19,0.000804,2.729412
3,0.018852,0.000501,0.462157,-6.296277999999999e-20,0.008131,0.839216
4,0.025723,0.00403,0.74563,-9.860282999999999e-19,0.025105,3.411765


SVM

In [28]:
X_train,X_test,y_train,y_test = train_test_split(dataset_he_rid,y,test_size=0.20)

In [29]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

tuned_parameters = {'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]}

clf = GridSearchCV(SVC(), tuned_parameters, scoring = 'accuracy')
clf.fit(X_train, y_train)


In [None]:
clf.best_estimator_

In [None]:
tuned_parameters = {'gamma': [1e-2, 1e-1],
                     'C': [2000, 4000, 6000]}

clf = GridSearchCV(SVC(probability = True), tuned_parameters, scoring = 'accuracy')
clf.fit(X_train, y_train)
clf.best_estimator_

In [None]:
pred_svm = clf.predict(X_test)
print('Confusion matrix:\n',confusion_matrix(y_test,pred_svm))
print('Classification_report:\n',classification_report(y_test,pred_svm))


Confusion matrix:
 [[68 10  0]
 [27 29 11]
 [ 1 12 52]]
Classification_report:
               precision    recall  f1-score   support

   defocused       0.71      0.87      0.78        78
      motion       0.57      0.43      0.49        67
       sharp       0.83      0.80      0.81        65

    accuracy                           0.71       210
   macro avg       0.70      0.70      0.70       210
weighted avg       0.70      0.71      0.70       210



Decision Tree

In [None]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
pred_tree = decision_tree.predict(X_test)
print('Confusion matrix:\n',confusion_matrix(y_test,pred_tree))
print('Classification_report:\n',classification_report(y_test,pred_tree))

Confusion matrix:
 [[57 20  1]
 [25 31 11]
 [ 2 13 50]]
Classification_report:
               precision    recall  f1-score   support

   defocused       0.68      0.73      0.70        78
      motion       0.48      0.46      0.47        67
       sharp       0.81      0.77      0.79        65

    accuracy                           0.66       210
   macro avg       0.66      0.65      0.65       210
weighted avg       0.66      0.66      0.66       210



KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
print('Confusion matrix:\n',confusion_matrix(y_test,pred_knn))
print('Classification_report:\n',classification_report(y_test,pred_knn))

Confusion matrix:
 [[67 11  0]
 [24 34  9]
 [ 0 17 48]]
Classification_report:
               precision    recall  f1-score   support

   defocused       0.74      0.86      0.79        78
      motion       0.55      0.51      0.53        67
       sharp       0.84      0.74      0.79        65

    accuracy                           0.71       210
   macro avg       0.71      0.70      0.70       210
weighted avg       0.71      0.71      0.71       210

