In [1]:
import numpy as np
import os
import random
import pandas as pd
import sklearn.datasets, sklearn.decomposition, sklearn.discriminant_analysis
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

def traintestMNIST(labels=None, ntrain=None, ntest=None, path=os.path.join('data_for_python', 'mnist.npz')):
    mnist = np.load(path)
    train_x = mnist['train_x']
    train_y = mnist['train_y']
    train = np.asarray(list(zip(train_x, train_y)))

    test_x = mnist['test_x']
    test_y = mnist['test_y']
    test = np.asarray(list(zip(test_x, test_y)))

    if labels:
        train = list(train[np.hstack([np.where(train_y==l) for l in labels]).squeeze()])
        test = list(test[np.hstack([np.where(test_y==l) for l in labels]).squeeze()])

    random.shuffle(train)
    random.shuffle(test)

    train_x, train_y = zip(*train)
    test_x, test_y = zip(*test)

    if ntrain:
        train_x = train_x[:ntrain]
        train_y = train_y[:ntrain]

    if ntest:
        test_x = test_x[:ntest]
        test_y = test_y[:ntest]

    return np.asarray(train_x), np.asarray(train_y), np.asarray(test_x), np.asarray(test_y)

In [2]:
def f(_):
    #on calcule séparemment pour avoir le meme nombre de 3 et de 7, qui est une hypothèse pour que notre méthode fonctionne.
    [trainImages3, trainLabels3, testImages3, testLabels3] = traintestMNIST([3],2000 )
    [trainImages7, trainLabels7, testImages7, testLabels7] = traintestMNIST([7],2000 )

    #on assemble les données concernant les 3 et les 7.
    trainImages=np.concatenate((trainImages3,trainImages7))
    trainLabels=np.concatenate((trainLabels3,trainLabels7))
    testImages=np.concatenate((testImages3,testImages7))
    testLabels=np.concatenate((testLabels3,testLabels7))
    
    #on prend uniquement les 50 premiers vect propres.
    pca = sklearn.decomposition.PCA(50)
    train_img_fit=pca.fit_transform(trainImages)
    test_img_fit=pca.fit_transform(testImages)
    
    #LDA
    mon_LDA=sklearn.discriminant_analysis.LinearDiscriminantAnalysis()
    mon_LDA.fit(train_img_fit,trainLabels)
    LDA_estimated = mon_LDA.predict(test_img_fit)
    conf_mat_LDA=confusion_matrix(testLabels,LDA_estimated)
    #confusion_matrix de sklearn tq (truth , estimated)

    #1-NN
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(train_img_fit, trainLabels)
    NN_estimated=neigh.predict(test_img_fit)
    conf_mat_1NN=confusion_matrix(testLabels,NN_estimated)
    return (conf_mat_LDA,conf_mat_1NN)

In [3]:
c=[]
for i in range(50):
    c.append(f(i))


In [11]:
print('Pour matrice de confusion:\n\n  LDA\n\n  1-NN')
print('---------\nmean:\n')
print(np.mean(c,0))
print('---------\nmedian:\n')
print(np.median(c,0))
print('---------\nmin:\n')
print(np.amin(c,0))
print('---------\nmax:\n')
print(np.amax(c,0))

Pour matrice de confusion:

  LDA

  1-NN
---------
mean:

[[[949.24  60.76]
  [ 35.74 992.26]]

 [[958.28  51.72]
  [ 34.36 993.64]]]
---------
median:

[[[ 949.    61. ]
  [  35.5  992.5]]

 [[ 972.5   37.5]
  [  25.5 1002.5]]]
---------
min:

[[[914  39]
  [ 13 974]]

 [[882  29]
  [ 10 932]]]
---------
max:

[[[ 971   96]
  [  54 1015]]

 [[ 981  128]
  [  96 1018]]]


In [7]:
a=f(1)
print(a[0])
print(a[1])

[[950  60]
 [ 61 967]]
[[872 138]
 [ 82 946]]
