# Reconnaissance de chiffres manuscrits : MNIST

## Librairies et fonctions utiles

In [None]:
# Directive pour afficher les graphiques dans Jupyter
%matplotlib inline

# Pandas : librairie de manipulation de données
# NumPy : librairie de calcul scientifique
# MatPlotLib : librairie de visualisation et graphiques
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn import model_selection

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score,auc, accuracy_score

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split

from sklearn import datasets

In [None]:
from keras.datasets import mnist

from keras.models import Sequential, load_model

from keras.layers import Dense, Dropout, Flatten

from keras.layers.convolutional import Conv2D, MaxPooling2D

from keras.utils.np_utils import to_categorical

## Le dataset de chiffres manuscrits MNIST

In [None]:
df = pd.read_csv("../input/mnist-in-csv/mnist_train.csv")

In [None]:
df.shape

In [None]:
#new_df=df.sample(n = 1000)
#new_df.shape

In [None]:
df.head()

In [None]:
df.label.value_counts()

In [None]:
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
print(labels)

In [None]:
n_samples = len(df.index)
images = np.array(df.drop(['label'],axis=1))
images = images.reshape(n_samples,28,28)

In [None]:
plt.figure(figsize=(10,20))
for i in range(0,49) :
    plt.subplot(10,5,i+1)
    plt.axis('off')
    plt.imshow(images[i], cmap="gray_r")
    plt.title(labels[df.label[i]])

## Réseaux denses (sklearn)

In [None]:
y = df['label']
X = df.drop(['label'] , axis=1)

In [None]:
X = X/255

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(200,60))
mlp.fit(X_train,y_train)
y_mlp = mlp.predict(X_test)

In [None]:
mlp_score = accuracy_score(y_test, y_mlp)
print(mlp_score)

In [None]:
pd.crosstab(y_test, y_mlp, rownames=['Reel'], colnames=['Prediction'], margins=True)

## Réseaux denses (Keras/Tensorflow)

In [None]:
from keras.utils.np_utils import to_categorical

In [None]:
print(y[0])
y_cat = to_categorical(y)
print(y_cat[0])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=1)

In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
#model.add(Dense(200, activation='relu'))
#model.add(Dense(60, activation='relu'))
#model.add(Dense(10, activation='softmax'))
model.add(Dense(784, activation='sigmoid'))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(60, activation='sigmoid'))
model.add(Dense(10, activation='sigmoid'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
train = model.fit(X_train , y_train , validation_data=(X_test,y_test), epochs=100, verbose=1)

In [None]:
model.evaluate(X_test,y_test)

In [None]:
print(train.history['accuracy'])

In [None]:
print(train.history['val_accuracy'])

In [None]:
def plot_scores(train) :
    accuracy = train.history['accuracy']
    val_accuracy = train.history['val_accuracy']
    epochs = range(len(accuracy))
    plt.plot(epochs, accuracy, 'b', label='Score apprentissage')
    plt.plot(epochs, val_accuracy, 'r', label='Score validation')
    plt.title('Scores')
    plt.legend()
    plt.show()

In [None]:
plot_scores(train)

In [None]:
d = pd.read_csv('../input/fashionmnist/fashion-mnist_train.csv')

In [None]:
d.shape

In [None]:
new_d=d.sample(n = 1000)
new_d.shape

In [None]:
d.head()

In [None]:
d.label.value_counts()

In [None]:
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
print(labels)

In [None]:
n_samples = len(d.index)
images = np.array(d.drop(['label'],axis=1))
images = images.reshape(n_samples,28,28)

In [None]:
plt.figure(figsize=(10,20))
for i in range(0,49) :
    plt.subplot(10,5,i+1)
    plt.axis('off')
    plt.imshow(images[i], cmap="gray_r")
    plt.title(labels[d.label[i]])

## Réseaux denses (sklearn)

In [None]:
y = d['label']
X = d.drop(['label'] , axis=1)

In [None]:
X = X/255

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(200,60))
mlp.fit(X_train,y_train)
y_mlp = mlp.predict(X_test)

In [None]:
mlp_score = accuracy_score(y_test, y_mlp)
print(mlp_score)

In [None]:
pd.crosstab(y_test, y_mlp, rownames=['Reel'], colnames=['Prediction'], margins=True)

## Réseaux denses (Keras/Tensorflow)

In [None]:
from keras.utils.np_utils import to_categorical

In [None]:
print(y[0])
y_cat = to_categorical(y)
print(y_cat[0])

In [None]:
num_classess = y_cat.shape[1]
print(num_classess)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(784, activation='sigmoid'))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(60, activation='sigmoid'))
model.add(Dense(10, activation='sigmoid'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
train = model.fit(X_train , y_train , validation_data=(X_test,y_test), epochs=100, verbose=1)

In [None]:
model.evaluate(X_test,y_test)

In [None]:
print(train.history['accuracy'])

In [None]:
print(train.history['val_accuracy'])

In [None]:
def plot_scores(train) :
    accuracy = train.history['accuracy']
    val_accuracy = train.history['val_accuracy']
    epochs = range(len(accuracy))
    plt.plot(epochs, accuracy, 'b', label='Score apprentissage')
    plt.plot(epochs, val_accuracy, 'r', label='Score validation')
    plt.title('Scores')
    plt.legend()
    plt.show()

In [None]:
plot_scores(train)