In [28]:
import pandas as pd              #analyse de donnees
import numpy as np               #manipulation de tableaux ou calcul mathematiques
import seaborn as sns            #visualisation de donnees
import matplotlib.pyplot as plt  #visualisation les données
import statsmodels.api as sm

In [29]:
data = pd.read_csv("C:/Users/nico_/Desktop/IA School M2/Fast Fashion/visuelle2/sales.csv")

# Création d'une matière polluante

In [30]:
# Définission matières polluantes
matieres_polluantes = ['acrylic','technical', 'polyviscous','fluid polyviscous','dark jean','light jean','nylon','paillettes']

# Ajout d'une colonne "matiere_polluante" qui vaut 1 si la matière est polluante, sinon 0 
data['matiere_polluante'] = [1 if matiere in matieres_polluantes else 0 for matiere in data['fabric']]

In [31]:
df = data.drop(['Unnamed: 0','external_code', 'retail', 'season', 'category', 'color','release_date', 'restock','fabric', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], axis=1)

In [32]:
df.head()

Unnamed: 0,image_path,matiere_polluante
0,PE17/00005.png,1
1,PE17/00002.png,1
2,PE17/00005.png,1
3,PE17/00009.png,0
4,PE17/00005.png,1


In [33]:
df['matiere_polluante'] = df['matiere_polluante'].astype(str)

In [34]:
# extraire les noms de fichiers
new_paths = []
for path in df['image_path']:
    #new_path = path.split('/')[-1].split('.')[0]
    new_path = path.split('/')[-1]
    new_paths.append(new_path)

# remplacer la colonne image_path par les nouveaux noms de fichiers extraits
df['image_path'] = new_paths

In [35]:
df.head()

Unnamed: 0,image_path,matiere_polluante
0,00005.png,1
1,00002.png,1
2,00005.png,1
3,00009.png,0
4,00005.png,1


# Je ne prends que les 500 premières photos

In [36]:
import os


# Définition des chemins d'accès aux images
image_dir = r'C:\Users\nico_\Desktop\IA School M2\Fast Fashion\visuelle2\images\images500'

# Liste de tous les fichiers dans le répertoire d'images
all_files = os.listdir(image_dir)

# Liste des fichiers dans le dataframe
df_files = df['image_path'].tolist()

# Vérification des fichiers manquants
missing_files = []
for file in df_files:
    if file not in all_files:
        missing_files.append(file)

# Suppression des lignes correspondantes dans le dataframe
df = df[~df['image_path'].isin(missing_files)]


In [37]:
df.isnull().sum()

image_path           0
matiere_polluante    0
dtype: int64

In [38]:
df.duplicated().sum()

4151

In [39]:
df.drop_duplicates(inplace=True)

In [40]:
df.duplicated().sum()

0

In [41]:
from sklearn.model_selection import train_test_split
#from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator


# Définition des chemins d'accès aux images
image_dir = r'C:\Users\nico_\Desktop\IA School M2\Fast Fashion\visuelle2\images\images500'

# Division des données en ensembles d'entraînement et de test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Configuration du générateur d'images
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

# Chargement des images à partir du générateur
train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_dir,
    x_col="image_path",
    y_col="matiere_polluante",
    target_size=(224, 224),
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_dir,
    x_col="image_path",
    y_col="matiere_polluante",
    target_size=(224, 224),
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42
)

# Chargement du modèle VGG16 sans la dernière couche fully-connected
base_model = VGG16(weights="imagenet", include_top=False)

# Ajout d'une nouvelle couche fully-connected pour la classification binaire
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation="relu")(x)
predictions = Dense(1, activation="sigmoid")(x)

# Définition du modèle final
model = Model(inputs=base_model.input, outputs=predictions)

# Compilation du modèle
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Entraînement du modèle
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=5
)

# Score du modèle
score = model.evaluate(validation_generator)
print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

Found 377 validated image filenames belonging to 2 classes.
Found 95 validated image filenames belonging to 2 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.2832900285720825
Test Accuracy: 0.9368420839309692
