<a href="https://colab.research.google.com/github/malborroni/RECMojion/blob/master/Model_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CNN + Fully Connected

Valutazione in 5-fold cross validation dei modelli
- feature extraction + CNN  
- feature extraction + Fully Connected

# Librerie

In [0]:
! sudo pip install git+https://github.com/rcmalli/keras-vggface.git

# Example of face detection with a vggface2 model
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import pandas as pd
import random as rnd

import seaborn as sns
from matplotlib import style
style.use('seaborn-pastel')

import keras
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from keras.models import Model, Sequential, load_model
from keras.utils import to_categorical
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
import keras_vggface
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input, decode_predictions
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint 
from sklearn.metrics import accuracy_score

import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split,StratifiedKFold

import random
import pickle

# Funzioni

In [0]:
def graph(history):
  # funzione che prende in input il modello fittato e fa un plot della loss in validation e train
  epochs=len(history.history['loss'])
  x_plot = list(range(1,epochs+1))
  plt.figure()
  
  plt.subplot(1, 2, 1)
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.plot(x_plot, history.history['loss'])
  plt.plot(x_plot, history.history['val_loss'])
  plt.legend(['Training', 'Validation'])

  
  plt.subplot(1, 2, 2)
  plt.xlabel('Epochs')
  plt.ylabel('Acc')
  plt.plot(x_plot, history.history['acc'])
  plt.plot(x_plot, history.history['val_acc'])
  plt.legend(['Training', 'Validation'])
  
  plt.tight_layout()
  plt.show()


#Dataset

Il dataset si trova in un file chiamato XY.pkl nel drive 

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


Import del dataset:

    - X : immagini (senza preprocessing)
    - Y : labels ("fear","happy",ect)

In [0]:
with open('/content/drive/My Drive/AML/XY.pkl', 'rb') as infile:
    result = pickle.load(infile)
X,Y=result

In [0]:
X.shape, Y.shape

((2590, 224, 224, 3), (2590,))

In [0]:
X = np.array(X).astype('float64')
Y = np.array(Y)

In [0]:
le = LabelEncoder()
Y = le.fit_transform(Y) # Encoding dei labels

# Feature Extration

In [0]:
# Import dei pesi della rete 
base_model = VGGFace(include_top = False, input_shape = (224, 224, 3), model='senet50')

In [0]:
X = preprocess_input(X,version=2) # preprocessing per la vggface, in particolare version 2 per la senet50

In [0]:
base_model.summary()

In [0]:
layer_cut = "activation_66" 
# Creazione del modello con output al layer scelto 
model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer_cut).output)

In [0]:
X = model.predict(X) # feature extraction

# CNN

In [0]:
# fix random seed for reproducibility
seed = 42
np.random.seed(seed)
# define 5-fold cross validation 
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
cvscores = []

In [0]:
batch_size = 128
num_epoch = 100
filepath = "/content/drive/My Drive/AML/cnn.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1,restore_best_weights=True) 

In [0]:
for train, test in kfold.split(X, Y):
  #create data
    x_train = X[train]
    x_test = X[test]
    y_train = to_categorical(Y[train], num_classes = 7)
    y_test = to_categorical(Y[test], num_classes = 7)

# Create model
    model = Sequential()
    # C1
    model.add(Conv2D(32, kernel_size=1, activation='relu', input_shape= (14, 14, 1024)))
    model.add(Conv2D(128, kernel_size=3, activation='relu'))
    model.add(Conv2D(32, kernel_size=1, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    # D
    model.add(Dense(210, activation='relu')) 
    model.add(Dropout(0.5)) 
    model.add(Dense(7, activation='softmax'))
# Compile model
    model.compile(loss=keras.losses.categorical_crossentropy,
                optimizer=keras.optimizers.Adam(),
                metrics=['accuracy'])	
# Fit the model
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = .2, random_state = 42, stratify = y_train)

    model_log = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=num_epoch,
          verbose=1,
          validation_data=(x_val, y_val),
          callbacks=[early_stop,checkpoint])
    graph(model_log)
# Evaluate the model
    scores = model.evaluate(x_test, y_test, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))


Salvataggio accuracy

In [0]:
kfold_acc = {}
kfold_acc["CNN"]=(np.mean(cvscores), np.std(cvscores))

# Fully Connected

In [0]:
batch_size = 256
num_epoch = 100
filepath = "/content/drive/My Drive/AML/fully.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1,restore_best_weights=True) 

In [0]:
from keras.utils import to_categorical
for train, test in kfold.split(X, Y):
  #create data
    x_train = X[train]
    x_test = X[test]
    y_train = to_categorical(Y[train], num_classes = 7)
    y_test = to_categorical(Y[test], num_classes = 7)

# Create model
    model = Sequential()
    model.add(Flatten())
    # D
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(56, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(7, activation='softmax'))
# Compile model
    model.compile(loss=keras.losses.categorical_crossentropy,
                optimizer=keras.optimizers.Adam(),
                metrics=['accuracy'])	
# Fit the model
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = .2, random_state = 42, stratify = y_train)

    model_log = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=num_epoch,
          verbose=1,
          validation_data=(x_val, y_val),
          callbacks=[early_stop,checkpoint])
    graph(model_log)
# Evaluate the model
    scores = model.evaluate(x_test, y_test, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

Salvataggio accuracy

In [0]:
kfold_acc["FC"]=(np.mean(cvscores), np.std(cvscores))

json = json.dumps(kfold_acc)
f = open("/content/drive/My Drive/AML/kfold_acc.json","w")
f.write(json)
f.close()