# Cours 5:  PMC avec Keras

Author: Ricardo Vallejo

### 1. Téléchargez le contenu de la base de données.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statistics
import numpy as np
import scipy.stats
import seaborn as sns

# 1. Téléchargez le contenu de la base de données iris

data = pd.read_excel("segmentation.xlsx")
pd.set_option('display.max_rows', None)
data.head(5)

In [None]:
dataX = data.loc[:, data.columns != 'classe']
dataY = data[['classe']]

In [None]:
data.info()

### 2. Procédez à une standardisation des données

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
datax_std = scaler.fit_transform(dataX)
dfx_std =  pd.DataFrame(datax_std, columns = [dataX.columns])
dfx_std.head(5)

### 3. Déterminez les différentes classes

In [None]:
data.groupby('classe').size()

### 4. Considérez une partition de 70% pour l’entrainement.


In [None]:
df_std_tot = pd.concat([dfx_std, dataY], axis = 1)
df_std_tot =  df_std_tot.set_axis([data.columns], axis=1, inplace=False) #pd.DataFrame(df_std_tot, columns = data.columns)
df_std_tot.head(5)

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_std_tot, test_size = 0.3, stratify = df_std_tot['classe'], random_state = 10) #40% data for tests


### 5. Vérifiez la taille de l’échantillon d’entrainement et de test par classe.

In [None]:
train.reset_index(drop = True, inplace = True)
test.reset_index(drop = True, inplace = True)

In [None]:
print(train['classe'].value_counts())

In [None]:
print(test['classe'].value_counts())

### 6. Développez un perceptron simple et une architecture séquentielle (activation=’softmax’, optimizer=’adam’)

### 6.1.  Data Model Train Encoding

In [None]:
from sklearn.preprocessing import OneHotEncoder

#encodage de classes
encoder =OneHotEncoder()
encodedTrainTarget = encoder.fit_transform(train[["classe"]])
labelsTrain=pd.DataFrame(encodedTrainTarget.toarray(), columns=encoder.categories_)
labelsTrain.head(5)

In [None]:
encoder.categories_

In [None]:
# Concatenation
df_train_tot=pd.concat([train,labelsTrain],  axis=1)
df_train_tot.head(5)

### 6.2.  Data Model Test Encoding

In [None]:
#encodage de classes
encoder =OneHotEncoder()
encodedTestTarget = encoder.fit_transform(test[["classe"]])
encodedTestTarget

labelsTest=pd.DataFrame(encodedTestTarget.toarray(), columns=encoder.categories_)
labelsTest.head(5)

In [None]:
# Concatenation
df_test_tot=pd.concat([test,labelsTest],axis=1)
df_test_tot.head(5)

In [None]:
df_test_tot.drop(columns=['classe'],inplace=True)
df_test_tot.head(5)

In [None]:
df_train_tot.drop(columns=['classe'],inplace=True)
df_train_tot.head(5)

In [None]:
X_train=df_train_tot[['region_centroid_col', 'region_centroid_row', 'region_pixel_count',
       'short_line_density_5', 'short_line_density_2', 'vedge_mean',
       'vegde_sd', 'hedge_mean', 'hedge_sd', 'intensity_mean', 'rawred_mean',
       'rawblue_mean', 'rawgreen_mean', 'exred_mean', 'exblue_mean',
       'exgreen_mean', 'value_mean', 'saturation_mean', 'hue_mean']]

y_train=df_train_tot[['brickface', 'cement', 'foliage',  'grass', 'path', 'sky', 'window']]

X_test=df_test_tot[['region_centroid_col', 'region_centroid_row', 'region_pixel_count',
       'short_line_density_5', 'short_line_density_2', 'vedge_mean',
       'vegde_sd', 'hedge_mean', 'hedge_sd', 'intensity_mean', 'rawred_mean',
       'rawblue_mean', 'rawgreen_mean', 'exred_mean', 'exblue_mean',
       'exgreen_mean', 'value_mean', 'saturation_mean', 'hue_mean']]

y_test=df_test_tot[['brickface', 'cement', 'foliage',  'grass', 'path', 'sky', 'window']]

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
encoder.categories_

In [None]:
X_test.shape

In [None]:
y_test.shape

In [None]:
# architecture séquentielle (activation=’softmax’, optimizer=’adam’)

from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
# Couches
from keras.layers import Dense

PMC = Sequential()

# Perceptron simple: Ajout de la couche "entrée -> sortie"
# Dense pour avoir des neurones complètement ement connectés
# mYTrain.shape[1] : nb de modalités de la var. cible en sortie
# X_train.shape[1] : dimension du vecteur de caractéristiques en entrée
# Fonction d'activation "Softmax"

PMC.add(Dense(units=labelsTrain.shape[1],input_dim=X_train.shape[1],activation='softmax'))
PMC.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Number of layers :',len(PMC.layers)) 


# fit model
results = PMC.fit(X_train, y_train,validation_data=(X_test, y_test),epochs=30)

In [None]:
# Vérification de l'architecture mise en place
PMC.get_config()

In [None]:
# Prédiction sur l'ensemble des données de test
# Permet de calculer les probabilités d'appartenance à chacune des classes:
# Score d'appartenance aux classes
YtestpredPS = PMC.predict(X_test)  # X es normalized data


In [None]:
YtestpredPS.shape

In [None]:
# Déterminer le score d'Appartenance max par ligne
# pour convertir les score en un indice de classes d'appartenance
idPredPS = np.argmax(YtestpredPS,axis=1)
print(idPredPS[:10])


In [None]:
labelsTest

In [None]:
#Transformation des numéros en classes prédites
clPredPS = labelsTrain.columns[idPredPS]
print(clPredPS[:10])

## 7. Représentez la matrice de confusion et évaluez les performances en utilisant classification_report..


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

cm = confusion_matrix(ytest, YtestpredPS)

# Plot confusion matrix
import seaborn as sns
import pandas as pd
# confusion matrix sns heatmap 
ax = plt.axes()
df_cm = cm
sns.heatmap(df_cm, annot=True, annot_kws={"size": 10}, fmt='d',cmap="Blues", ax = ax )
ax.set_title('Confusion Matrix')
plt.show()

In [524]:
333# Matrice de confusion
from sklearn import metrics
print(metrics.confusion_matrix(y_test,clPredPS))

ValueError: You appear to be using a legacy multi-label data representation. Sequence of sequences are no longer supported; use a binary array or sparse matrix instead - the MultiLabelBinarizer transformer can convert to this format.

In [None]:
print(classification_report(ytest, YtestpredPS))

### 9. Développez un perceptron multicouche (2 couches cachées à 30 neurones, fonctions d’activation ReLu)

In [None]:
#La fonction de perte utilisée est la perte d’entropie croisée
#La fonction loss peut prendre les paramètres suivants :
# Cas de classification binaire (Binary Cross-Entropy) :
#BinaryCrossentropy
# Cas de classification multi-classe (Categorical Cross-Entropy)
#CategoricalCrossentropy


def create_model(optimizer='adam',activation='relu'):

    model = Sequential()
    # Ajout de la premiere couche "entree −> cachee"
    # 30 neurones dans la premiere couche cachee
    # ZTrain . shape[1] : dimension du vecteur de caracteristiques en entree
    model.add(layers.Dense( units=30,input_dim=X_train.shape[1] , activation=activation ) )
    # Ajout de la seconde couche "cachee −> cachee"
    # 30 neurones dans la deuxieme couche cachee
    model.add(layers.Dense(30, activation=activation ) )
    # Ajout de la troisieme couche "cachee −> so r tie "
    # mYTrain. shape[1] neurones dans la couche cachee =
    # nb de modalites de la variable cible

    model.add(layers.Dense( units=y_train.shape[1] , activation='softmax') )

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [530]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

# create model
model = KerasClassifier(build_fn=create_model, epochs=30, batch_size=10, verbose=0)


# define the grid search parameters
#on va tester les fonctions d'activation et les optimizateurs
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
activation = ['softmax', 'relu','sigmoid','softplus','softsign','tanh','selu','elu']

param_grid = dict(optimizer=optimizer,activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)
grid_result


KeyboardInterrupt



In [None]:


# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

### 10. . Représentez la matrice de confusion et évaluez les performances en utilisant classification_report.


In [None]:
matrix = confusion_matrix(ytest, ypred)
print(matrix)
print(classification_report(ytest, ypred))