# Classification de propagande


## Import et Install

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

## Pre-traitement

### creation du dataframe

In [2]:
def load_images_into_df(folder_path):
    images_data = []
    labels = []

    for class_folder in os.listdir(folder_path):
        class_path = os.path.join(folder_path, class_folder)
        if os.path.isdir(class_path):
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                images_data.append(img_path)
                labels.append(class_folder)

    df = pd.DataFrame({'image_path': images_data, 'label': labels})
    return df

### redimention des images et creation de la colone image_data

In [3]:
def resize_images(df, target_size=(224, 224)):
    def resize_img(img_path):
        img = image.load_img(img_path, target_size=target_size)
        return image.img_to_array(img)

    df['image_data'] = df['image_path'].apply(resize_img)
    return df

### Split dataframe

In [4]:
def split_dataset(df, test_size=0.2):
    train_df, test_df = train_test_split(df, test_size=test_size, random_state=42)
    return train_df, test_df

In [5]:
def build_resnet50_model(num_classes):
    base_model = ResNet50(weights='imagenet', include_top=False)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    return model


In [6]:
# Étape 5: Entraîner le modèle et afficher l'évolution des métriques
def train_and_evaluate_model(train_df, test_df, num_classes, epochs=10, batch_size=32):
    train_images = np.array(train_df['image_data'].tolist())
    train_labels = pd.get_dummies(train_df['label']).values

    test_images = np.array(test_df['image_data'].tolist())
    test_labels = pd.get_dummies(test_df['label']).values

    model = build_resnet50_model(num_classes)
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_images, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_images, test_labels))


## Execution et entrainement

In [7]:
# Chemin vers le dossier contenant les images
dataset_folder = "/content/drive/MyDrive/DNN/data"

# Étape 1: Charger les images dans un DataFrame
df = load_images_into_df(dataset_folder)

In [8]:
# Étape 2: Redimensionner les images
df = resize_images(df)
df.head(5)

Unnamed: 0,image_path,label,image_data
0,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[54.0, 57.0, 40.0], [54.0, 57.0, 40.0], [54...."
1,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[104.0, 76.0, 54.0], [104.0, 76.0, 54.0], [1..."
2,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[10.0, 10.0, 12.0], [10.0, 10.0, 12.0], [10...."
3,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[11.0, 20.0, 1.0], [11.0, 20.0, 1.0], [11.0,..."
4,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[37.0, 29.0, 26.0], [37.0, 29.0, 26.0], [37...."


In [9]:
# Étape 3: Diviser le DataFrame en ensembles d'entraînement et de test
train_df, test_df = split_dataset(df)

In [10]:
train_df.head(5)

Unnamed: 0,image_path,label,image_data
10,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[23.0, 23.0, 25.0], [13.0, 13.0, 13.0], [13...."
147,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[11.0, 22.0, 28.0], [11.0, 22.0, 28.0], [11...."
342,/content/drive/MyDrive/DNN/data/propagande/ima...,propagande,"[[[27.0, 25.0, 12.0], [27.0, 25.0, 12.0], [191..."
999,/content/drive/MyDrive/DNN/data/paper/paper418...,paper,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]..."
811,/content/drive/MyDrive/DNN/data/paper/paper546...,paper,"[[[202.0, 198.0, 195.0], [201.0, 197.0, 194.0]..."


In [11]:
# Nombre de classes dans votre ensemble de données
num_classes = len(df['label'].unique())
print(num_classes)

2


In [12]:
# Étape 5: Entraîner le modèle et afficher l'évolution des métriques
train_and_evaluate_model(train_df, test_df, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
