### Extraction des features avec RESNET50

In [1]:
# Loading all necessary libraries and modules
import os
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import Flatten, Input

#### 1-Chargement des fichiers avec des échantillons de données par catégorie

In [2]:
datasets_path = "C:\\Users\\toure\\Desktop\\OpenClassrooms\\Projet 8\\donnees_image\\fruits-360_dataset\\fruits-360\\Training\\"

In [3]:
image_num = 10

In [4]:
# use ResNet50 model extract feature from fc1 layer
base_model = ResNet50(weights='imagenet', pooling=max, include_top = False)
input = Input(shape=(224,224,3),name = 'image_input')
x = base_model(input)
x = Flatten()(x)
model = Model(inputs=input, outputs=x)

In [5]:
data_label = []
data_feat = []

dir_list = os.listdir(datasets_path)
for dir in dir_list:
    fname = os.listdir(datasets_path + dir)[:image_num]
    for im in fname:
        img_path = datasets_path + dir+ "\\" + im
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        features = model.predict(x).reshape(100352)
        
        data_label.append(dir)
        data_feat.append(features)


In [6]:
data_all = pd.concat([pd.DataFrame(data_label,columns=['label']),pd.DataFrame(data_feat)], axis=1)

In [7]:
data_all.head()

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,100342,100343,100344,100345,100346,100347,100348,100349,100350,100351
0,Apple Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Apple Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Apple Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Apple Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,0.56689,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Apple Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,1.635632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Réduction de dimension par PCA

In [8]:
from sklearn.decomposition import PCA
# pca - keep 90% of variance
pca = PCA(0.90)
principal_components = pca.fit_transform(data_all.drop(["label"],axis=1))

In [9]:
principal_df = pd.concat([data_all.label, pd.DataFrame(data = principal_components)], axis=1)
print(principal_df.shape)

(1310, 174)


In [12]:
principal_df.head()

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,163,164,165,166,167,168,169,170,171,172
0,Apple Braeburn,-126.079216,-20.465063,2.109932,-16.983776,20.552832,-55.029251,11.70569,-68.226257,-15.284904,...,-2.00143,5.611289,2.469967,13.452518,4.699523,-6.44544,-2.791751,-1.687784,-9.780011,-4.474288
1,Apple Braeburn,-159.82106,-49.361229,26.784433,1.381939,0.252728,-0.365676,50.401302,-12.385925,-82.419731,...,-0.891619,-5.292984,-4.847217,-1.164312,1.676399,9.762865,-10.650976,-6.978362,-5.724542,-0.335709
2,Apple Braeburn,-163.084946,-50.975998,32.95134,-2.983782,-2.024314,7.104094,55.642479,-7.860163,-88.528183,...,-0.831624,-1.517816,-5.190097,-5.411776,3.861518,8.233482,-6.704696,-2.499938,-2.53061,1.07261
3,Apple Braeburn,-160.648453,-49.477711,42.307613,5.905721,-4.085958,10.723994,53.374607,-8.55477,-100.312035,...,-2.856441,-0.888656,-4.545717,-5.49129,2.400442,7.536231,-6.292813,-6.892319,-2.751713,-4.750124
4,Apple Braeburn,-159.546143,-49.895878,53.068077,10.417496,-6.05759,9.659484,55.258743,-11.540544,-100.69619,...,-1.102456,-0.746603,-2.300404,-6.746674,2.137212,6.150426,-5.815139,-6.398622,0.068932,-3.145939


### Stockage en fichier csv

In [10]:
path = "C:\\Users\\toure\\Desktop\\OpenClassrooms\\Projet 8\\"

In [11]:
principal_df.to_csv(path+"sortie_pandas.csv")