# **Train data prep:**

**Generate data with densenet121 into seperated files**

In [None]:
import os
import pandas as pd
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np

# Charger le modèle DenseNet-121 pré-entraîné
base_model = DenseNet121(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)

def extract_features(img_path):
    """Extrait les caractéristiques d'une image en utilisant DenseNet-121."""
    img = image.load_img(img_path, target_size=(224, 224))  # DenseNet-121 uses 224x224 input size
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()

def process_images_for_id(id, folder_path, data_row):
    """Trouve et traite les images pour un ID donné dans le dossier spécifié."""
    results = pd.DataFrame()
    for i in range(0, 200):  # Pour chaque image de l'ID
        img_name = f"{id}-{i}.jpg"
        img_path = os.path.join(folder_path, img_name)
        if os.path.exists(img_path):
            features = extract_features(img_path)
            # Ajouter les caractéristiques et les métadonnées dans le DataFrame
            result = {'id': id, 'image_name': img_name, **{f'feature_{j+1}': feature for j, feature in enumerate(features)}, **data_row}
            results = results._append(result, ignore_index=True)
    return results

# Lire le fichier CSV
prin_df = pd.read_csv('D:/federated learning/archive/train/prin.csv')

# Chemins des dossiers contenant les images
folders = ['D:/federated learning/archive/train/end', 'D:/federated learning/archive/train/lchange', 'D:/federated learning/archive/train/lturn', 'D:/federated learning/archive/train/rchange', 'D:/federated learning/archive/train/rturn']

# Pour chaque dossier, traiter les images et enregistrer les résultats dans des fichiers Excel séparés
for folder_path in folders:
    folder_name = os.path.basename(folder_path)
    results_df = pd.DataFrame()
    for index, row in prin_df.iterrows():
        id = row['id']
        result = process_images_for_id(id, folder_path, row.to_dict())
        results_df = results_df._append(result)  # Change _append to append

    # Enregistrer les résultats dans un fichier Excel séparé pour chaque dossier
    output_filename = f'D:/federated learning/archive/train/{folder_name}_output_train.csv'
    results_df.to_csv(output_filename, index=False)

# Note: N'oubliez pas d'adapter votre code pour correspondre à votre configuration spécifique.




**combine the files**

In [None]:
import pandas as pd

# List of Excel file names
path = "D:/federated learning/archive/train/"
file_names = [path+'end_output_train.csv', path+'lchange_output_train.csv', path+'lturn_output_train.csv', path+'rchange_output_train.csv', path+'rturn_output_train.csv']
# Read all Excel files into separate DataFrames and concatenate them
dfs = [pd.read_csv(file) for file in file_names]
df = pd.concat(dfs)


In [None]:
df

Unnamed: 0,id,image_name,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,lane_right,road_artifact,manoeuvre,v1,v2,v3,v4,v5,v6,v7
0,20141019_091035_1106_1256,20141019_091035_1106_1256-0.jpg,0.000175,0.002667,0.002479,0.000542,0.149877,0.275744,0.000846,0.004425,...,2.0,1.0,end,0.176379,0.188793,0.188793,0.189790,0.195442,0.195442,0.195442
1,20141019_091035_1106_1256,20141019_091035_1106_1256-1.jpg,0.000154,0.002384,0.002411,0.000602,0.165721,0.239411,0.000633,0.004744,...,2.0,1.0,end,0.176379,0.188793,0.188793,0.189790,0.195442,0.195442,0.195442
2,20141019_091035_1106_1256,20141019_091035_1106_1256-3.jpg,0.000130,0.002980,0.002142,0.000760,0.158415,0.187496,0.000708,0.004957,...,2.0,1.0,end,0.176379,0.188793,0.188793,0.189790,0.195442,0.195442,0.195442
3,20141019_091035_1106_1256,20141019_091035_1106_1256-5.jpg,0.000129,0.003139,0.002679,0.000892,0.170225,0.168422,0.000761,0.004735,...,2.0,1.0,end,0.176379,0.188793,0.188793,0.189790,0.195442,0.195442,0.195442
4,20141019_091035_1106_1256,20141019_091035_1106_1256-7.jpg,0.000190,0.003919,0.002485,0.000679,0.223878,0.178166,0.000614,0.005599,...,2.0,1.0,end,0.176379,0.188793,0.188793,0.189790,0.195442,0.195442,0.195442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2655,20141126_142224_4263_4413,20141126_142224_4263_4413-141.jpg,0.000424,0.004490,0.004567,0.001421,0.253137,0.021801,0.000538,0.006349,...,2.0,1.0,rturn,0.646565,0.685154,0.695619,0.700104,0.700104,0.691229,0.687425
2656,20141126_142224_4263_4413,20141126_142224_4263_4413-143.jpg,0.000413,0.004438,0.004488,0.001312,0.238713,0.032836,0.000546,0.006232,...,2.0,1.0,rturn,0.646565,0.685154,0.695619,0.700104,0.700104,0.691229,0.687425
2657,20141126_142224_4263_4413,20141126_142224_4263_4413-145.jpg,0.000406,0.004454,0.004720,0.001485,0.215381,0.035957,0.000525,0.006417,...,2.0,1.0,rturn,0.646565,0.685154,0.695619,0.700104,0.700104,0.691229,0.687425
2658,20141126_142224_4263_4413,20141126_142224_4263_4413-147.jpg,0.000350,0.003842,0.004641,0.001455,0.234168,0.036368,0.000550,0.006482,...,2.0,1.0,rturn,0.646565,0.685154,0.695619,0.700104,0.700104,0.691229,0.687425


In [None]:
# Extracting ID from the 'image_name' column
df['id_from_image'] = df['image_name'].str.split('-').str[-1].str.split('.').str[0].astype(int)

In [None]:
# Sorting the DataFrame by 'id_from_image' column and 'image_name' for consistent ordering
df = df.sort_values(by=['id','id_from_image' ]).reset_index(drop=True)

In [None]:
df['id_from_image']

0          0
1          1
2          3
3          5
4          7
        ... 
30547    141
30548    143
30549    145
30550    147
30551    149
Name: id_from_image, Length: 30552, dtype: int32

In [None]:
df[df["id"]==	'20141019_084523_524_652']

Unnamed: 0,id,image_name,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,road_artifact,manoeuvre,v1,v2,v3,v4,v5,v6,v7,id_from_image
0,20141019_084523_524_652,20141019_084523_524_652-0.jpg,0.000206,0.004332,0.002938,0.001088,0.169714,0.263350,0.000964,0.005413,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,0
1,20141019_084523_524_652,20141019_084523_524_652-1.jpg,0.000208,0.004632,0.002727,0.001010,0.168863,0.241776,0.000967,0.005209,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,1
2,20141019_084523_524_652,20141019_084523_524_652-3.jpg,0.000220,0.004850,0.002802,0.001137,0.161770,0.245964,0.000957,0.005780,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,3
3,20141019_084523_524_652,20141019_084523_524_652-5.jpg,0.000225,0.004715,0.002659,0.001038,0.170318,0.262266,0.001038,0.005606,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,5
4,20141019_084523_524_652,20141019_084523_524_652-7.jpg,0.000217,0.004513,0.002733,0.001141,0.168799,0.241890,0.000954,0.006233,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,20141019_084523_524_652,20141019_084523_524_652-141.jpg,0.000158,0.005389,0.002639,0.001410,0.168819,0.280058,0.000647,0.004972,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,141
72,20141019_084523_524_652,20141019_084523_524_652-143.jpg,0.000158,0.005389,0.002639,0.001410,0.168819,0.280058,0.000647,0.004972,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,143
73,20141019_084523_524_652,20141019_084523_524_652-145.jpg,0.000158,0.005389,0.002639,0.001410,0.168819,0.280058,0.000647,0.004972,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,145
74,20141019_084523_524_652,20141019_084523_524_652-147.jpg,0.000158,0.005389,0.002639,0.001410,0.168819,0.280058,0.000647,0.004972,...,1.0,lturn,0.039024,0.039024,0.023668,0.017087,0.017087,0.017087,0.027338,147


In [None]:
# Initialize 'speed' column with NaN
df['speed'] = ''
df['speed'] = pd.to_numeric(df['speed'], errors='coerce')

# Iterate through each unique video ID
for vid_id in df['id'].unique():
    vid_indices = df[df['id'] == vid_id].index  # Get indices for the current video ID
    speeds = df.loc[vid_indices, 'v1':'v7'].iloc[0].tolist()  # Get speeds for the video ID

    # Assign each speed value to every 11th frame for the current video ID
    for i in range(0, len(vid_indices), 11):
        frames = min(11, len(vid_indices) - i)  # Check remaining frames to avoid index out of range
        df.loc[vid_indices[i:i + frames], 'speed'] = speeds[i // 11]


In [None]:
list_col = ['v1','v2','v3','v4','v5','v6','v7','id_from_image']

In [None]:
df.drop(columns=list_col,inplace=True)

In [None]:
df['image_name'] = df['image_name'].str.replace('.jpg', '')

In [None]:
df

Unnamed: 0,id,image_name,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_1020,feature_1021,feature_1022,feature_1023,feature_1024,lane_lift,lane_right,road_artifact,manoeuvre,speed
0,20141019_084523_524_652,20141019_084523_524_652-0,0.000206,0.004332,0.002938,0.001088,0.169714,0.263350,0.000964,0.005413,...,1.030366,3.294652,0.020261,1.750881,2.958675,2.0,2.0,1.0,lturn,0.039024
1,20141019_084523_524_652,20141019_084523_524_652-1,0.000208,0.004632,0.002727,0.001010,0.168863,0.241776,0.000967,0.005209,...,0.902043,3.060211,0.045156,1.889733,2.958744,2.0,2.0,1.0,lturn,0.039024
2,20141019_084523_524_652,20141019_084523_524_652-3,0.000220,0.004850,0.002802,0.001137,0.161770,0.245964,0.000957,0.005780,...,0.755011,2.755459,0.076186,1.774965,3.145374,2.0,2.0,1.0,lturn,0.039024
3,20141019_084523_524_652,20141019_084523_524_652-5,0.000225,0.004715,0.002659,0.001038,0.170318,0.262266,0.001038,0.005606,...,0.742209,2.824840,0.053867,1.751431,3.315842,2.0,2.0,1.0,lturn,0.039024
4,20141019_084523_524_652,20141019_084523_524_652-7,0.000217,0.004513,0.002733,0.001141,0.168799,0.241890,0.000954,0.006233,...,0.923087,2.550010,0.043626,1.815799,3.522757,2.0,2.0,1.0,lturn,0.039024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30547,20141126_152227_180_330,20141126_152227_180_330-141,0.000297,0.006405,0.003923,0.001825,0.175097,0.131989,0.000319,0.007116,...,1.461047,1.561885,0.135029,2.010328,0.334353,2.0,2.0,0.0,end,0.800103
30548,20141126_152227_180_330,20141126_152227_180_330-143,0.000303,0.006744,0.003853,0.001990,0.168434,0.181973,0.000306,0.007020,...,1.516700,1.155631,0.100127,1.891425,0.294416,2.0,2.0,0.0,end,0.800103
30549,20141126_152227_180_330,20141126_152227_180_330-145,0.000270,0.006450,0.003900,0.002024,0.165567,0.184389,0.000388,0.006671,...,1.707040,1.047461,0.072117,2.276897,0.373202,2.0,2.0,0.0,end,0.800103
30550,20141126_152227_180_330,20141126_152227_180_330-147,0.000270,0.005941,0.003706,0.002001,0.170674,0.144951,0.000371,0.006675,...,1.698399,0.975511,0.046206,2.410213,0.292629,2.0,2.0,0.0,end,0.800103


In [None]:
# Define the output Excel file name
output_file = path+'train.csv'

# Save the sorted DataFrame to a new Excel file
df.to_csv(output_file, index=False)

print(f"Data has been concatenated, processed, and saved to '{output_file}' successfully.")


Data has been concatenated, processed, and saved to 'D:/federated learning/archive/train/train.csv' successfully.


**Test data prep**

In [None]:
import os
import pandas as pd
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np

# Charger le modèle DenseNet-121 pré-entraîné
base_model = DenseNet121(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)

def extract_features(img_path):
    """Extrait les caractéristiques d'une image en utilisant DenseNet-121."""
    img = image.load_img(img_path, target_size=(224, 224))  # DenseNet-121 uses 224x224 input size
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()

def process_images_for_id(id, folder_path, data_row):
    """Trouve et traite les images pour un ID donné dans le dossier spécifié."""
    results = pd.DataFrame()
    for i in range(0, 200):  # Pour chaque image de l'ID
        img_name = f"{id}-{i}.jpg"
        img_path = os.path.join(folder_path, img_name)
        if os.path.exists(img_path):
            features = extract_features(img_path)
            # Ajouter les caractéristiques et les métadonnées dans le DataFrame
            result = {'id': id, 'image_name': img_name, **{f'feature_{j+1}': feature for j, feature in enumerate(features)}, **data_row}
            results = results._append(result, ignore_index=True)  # Change _append to append
    return results

# Lire le fichier CSV
prin_df = pd.read_csv('D:/federated learning/archive/train/prin.csv')

# Chemins des dossiers contenant les images
folders = ['D:/federated learning/archive/test/end', 'D:/federated learning/archive/test/lchange', 'D:/federated learning/archive/test/lturn', 'D:/federated learning/archive/test/rchange', 'D:/federated learning/archive/test/rturn']

# Pour chaque dossier, traiter les images et enregistrer les résultats dans des fichiers CSV séparés
for folder_path in folders:
    folder_name = os.path.basename(folder_path)
    results_df = pd.DataFrame()
    for index, row in prin_df.iterrows():
        id = row['id']
        result = process_images_for_id(id, folder_path, row.to_dict())
        results_df = results_df._append(result)  # Change _append to append

    # Enregistrer les résultats dans un fichier CSV séparé pour chaque dossier
    output_filename = f'D:/federated learning/archive/test/{folder_name}_output_test.csv'
    results_df.to_csv(output_filename, index=False)

# Note: N'oubliez pas d'adapter votre code pour correspondre à votre configuration spécifique.




In [None]:
import pandas as pd

# List of Excel file names
path = "D:/federated learning/archive/test/"
file_names = [path+'end_output_test.csv', path+'lchange_output_test.csv', path+'lturn_output_test.csv', path+'rchange_output_test.csv', path+'rturn_output_test.csv']
# Read all Excel files into separate DataFrames and concatenate them
dfs = [pd.read_csv(file) for file in file_names]
test_data = pd.concat(dfs)

In [None]:
# Extracting ID from the 'image_name' column
test_data['id_from_image'] = test_data['image_name'].str.split('-').str[-1].str.split('.').str[0].astype(int)
# Sorting the DataFrame by 'id_from_image' column and 'image_name' for consistent ordering
test_data = test_data.sort_values(by=['id','id_from_image' ]).reset_index(drop=True)
# Initialize 'speed' column with NaN
test_data['speed'] = ''
test_data['speed'] = pd.to_numeric(test_data['speed'], errors='coerce')

# Iterate through each unique video ID
for vid_id in test_data['id'].unique():
    vid_indices = test_data[test_data['id'] == vid_id].index  # Get indices for the current video ID
    speeds = test_data.loc[vid_indices, 'v1':'v7'].iloc[0].tolist()  # Get speeds for the video ID

    # Assign each speed value to every 11th frame for the current video ID
    for i in range(0, len(vid_indices), 11):
        frames = min(11, len(vid_indices) - i)  # Check remaining frames to avoid index out of range
        test_data.loc[vid_indices[i:i + frames], 'speed'] = speeds[i // 11]
list_col = ['v1','v2','v3','v4','v5','v6','v7','id_from_image']
test_data.drop(columns=list_col,inplace=True)
test_data['image_name'] = test_data['image_name'].str.replace('.jpg', '')

output_file = path+'test.csv'
test_data.to_csv(output_file, index=False)

print(f"Data has been concatenated, processed, and saved to '{output_file}' successfully.")


Data has been concatenated, processed, and saved to 'D:/federated learning/archive/test/test.csv' successfully.


In [None]:
test_data

Unnamed: 0,id,image_name,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_1020,feature_1021,feature_1022,feature_1023,feature_1024,lane_lift,lane_right,road_artifact,manoeuvre,speed
0,20141220_130914_1352_1502,20141220_130914_1352_1502-0,0.000107,0.004486,0.001802,0.001531,0.192848,0.298973,0.000407,0.006171,...,1.096406,0.406703,0.307821,1.739031,1.044053,1.0,3.0,0.0,ichange,0.187500
1,20141220_130914_1352_1502,20141220_130914_1352_1502-1,0.000113,0.004368,0.001740,0.001443,0.183267,0.246802,0.000391,0.006351,...,1.198308,0.307568,0.292584,1.694856,1.001825,1.0,3.0,0.0,ichange,0.187500
2,20141220_130914_1352_1502,20141220_130914_1352_1502-3,0.000090,0.004036,0.001674,0.001359,0.194337,0.276246,0.000474,0.006494,...,0.737248,0.293347,0.365769,1.686346,1.058568,1.0,3.0,0.0,ichange,0.187500
3,20141220_130914_1352_1502,20141220_130914_1352_1502-5,0.000104,0.003550,0.001735,0.001314,0.189920,0.283442,0.000459,0.006478,...,0.798873,0.386024,0.230171,1.624868,1.493420,1.0,3.0,0.0,ichange,0.187500
4,20141220_130914_1352_1502,20141220_130914_1352_1502-7,0.000120,0.004293,0.001771,0.001200,0.183815,0.285626,0.000467,0.006494,...,0.767990,0.308440,0.156578,1.922390,1.342377,1.0,3.0,0.0,ichange,0.187500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14587,20141220_170004_1099_1249,20141220_170004_1099_1249-141,0.000266,0.006493,0.003387,0.002593,0.249397,0.269814,0.000280,0.006776,...,0.113975,0.341451,0.229917,1.623238,0.710991,1.0,1.0,0.0,end,0.784835
14588,20141220_170004_1099_1249,20141220_170004_1099_1249-143,0.000264,0.006563,0.003850,0.002699,0.270976,0.226608,0.000375,0.005960,...,0.179260,0.404094,0.098251,0.920940,0.927088,1.0,1.0,0.0,end,0.784835
14589,20141220_170004_1099_1249,20141220_170004_1099_1249-145,0.000264,0.005642,0.003558,0.002927,0.257725,0.281586,0.000307,0.006566,...,0.133651,0.397281,0.075375,1.230746,0.990607,1.0,1.0,0.0,end,0.784835
14590,20141220_170004_1099_1249,20141220_170004_1099_1249-147,0.000280,0.006881,0.003527,0.002480,0.272290,0.241146,0.000344,0.006274,...,0.532675,0.305839,0.228749,1.035737,0.836689,1.0,1.0,0.0,end,0.784835


# **LSTM**

In [81]:
#the training set from train folder and the testing set from test folder
train_data =  pd.read_csv("/content/drive/MyDrive/FL_LSTM/Dossier sans titre/train.csv")
test_data = pd.read_csv("/content/drive/MyDrive/FL_LSTM/Dossier sans titre/test.csv")

Training the LSTM model

In [90]:
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import LSTM, Dense, Input
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split



label_encoder = LabelEncoder()
train_data['manoeuvre_encoded'] = label_encoder.fit_transform(train_data['manoeuvre'])

# Selecting relevant columns (features and target) in the training dataset
numeric_data = train_data.select_dtypes(include=[np.number])
train_features = numeric_data.drop(columns=['manoeuvre_encoded'])
train_target = train_data['manoeuvre_encoded']

# Splitting the training data into train and validation sets
train_features, val_features, train_target, val_target = train_test_split(train_features, train_target, test_size=0.2, random_state=42)

# Reshape data for LSTM input (sequence length = 1)
train_features = np.array(train_features)
train_features_reshaped = train_features.reshape((train_features.shape[0], 1, train_features.shape[1]))

val_features = np.array(val_features)
val_features_reshaped = val_features.reshape((val_features.shape[0], 1, val_features.shape[1]))

# Define the model architecture using functional API
inputs = Input(shape=(1, train_features.shape[1]))
lstm = LSTM(77)(inputs)
outputs = Dense(len(np.unique(train_target)), activation='softmax')(lstm)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define ModelCheckpoint callback to save the best model during training based on validation accuracy
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

# Train the model with ModelCheckpoint callback
history = model.fit(train_features_reshaped, train_target, epochs=15, batch_size=76, validation_data=(val_features_reshaped, val_target), callbacks=[checkpoint])

#NOTE: we left the Batch size as 76 and the LSTM's hidden units since they were the only values
#that gave us the best accuracy for our case.


Epoch 1/15
Epoch 1: val_accuracy improved from -inf to 0.76534, saving model to best_model.h5
Epoch 2/15
 12/322 [>.............................] - ETA: 3s - loss: 0.5640 - accuracy: 0.8125

  saving_api.save_model(


Epoch 2: val_accuracy improved from 0.76534 to 0.91033, saving model to best_model.h5
Epoch 3/15
Epoch 3: val_accuracy improved from 0.91033 to 0.94551, saving model to best_model.h5
Epoch 4/15
Epoch 4: val_accuracy improved from 0.94551 to 0.96220, saving model to best_model.h5
Epoch 5/15
Epoch 5: val_accuracy improved from 0.96220 to 0.97349, saving model to best_model.h5
Epoch 6/15
Epoch 6: val_accuracy improved from 0.97349 to 0.97987, saving model to best_model.h5
Epoch 7/15
Epoch 7: val_accuracy improved from 0.97987 to 0.98216, saving model to best_model.h5
Epoch 8/15
Epoch 8: val_accuracy improved from 0.98216 to 0.98249, saving model to best_model.h5
Epoch 9/15
Epoch 9: val_accuracy improved from 0.98249 to 0.98593, saving model to best_model.h5
Epoch 10/15
Epoch 10: val_accuracy improved from 0.98593 to 0.98642, saving model to best_model.h5
Epoch 11/15
Epoch 11: val_accuracy improved from 0.98642 to 0.98805, saving model to best_model.h5
Epoch 12/15
Epoch 12: val_accuracy im

In [91]:
# Load the best saved model based on validation accuracy
best_model = Model(inputs=inputs, outputs=outputs)
best_model.load_weights('best_model.h5')

# Compile the best model
best_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Test

In [92]:

test_data['manoeuvre_encoded'] = label_encoder.transform(test_data['manoeuvre'])

# Selecting relevant columns (features and target) in the testing dataset
test_numeric_data = test_data.select_dtypes(include=[np.number])
test_features = test_numeric_data.drop(columns=['manoeuvre_encoded'])
test_target = test_data['manoeuvre_encoded']

# Reshape data for LSTM input (sequence length = 1)
test_features = np.array(test_features)
test_features_reshaped = test_features.reshape((test_features.shape[0], 1, test_features.shape[1]))

# Evaluate the best model on the testing data
test_loss, test_accuracy = best_model.evaluate(test_features_reshaped, test_target)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 45.18%


# **Federated Learning**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Reading the training data

In [93]:
import pandas as pd
train_data = pd.read_csv("/content/drive/MyDrive/FL_LSTM/Dossier sans titre/train.csv")

Unfortunately when relying on the "tensorflow_federated" library we encounter problems in the envirenment itself such as in this case "build_weighted_fed_avg" is not defined in any compatible version of tff with tensorflow and we can't downgrade tf's version since it'll create another problem with the python version of collab and so on and that's why we'll try a manual approach

In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
from sklearn.preprocessing import LabelEncoder



# Encode the 'manoeuvre' labels
label_encoder = LabelEncoder()
df['manoeuvre'] = label_encoder.fit_transform(df['manoeuvre'])

# Shuffle the DataFrame and split into three equal parts
num_clients = 3
split_data = np.array_split(df.sample(frac=1, random_state=42), num_clients)

def preprocess(client_df):
    features = client_df.iloc[:, 2:1026].values  # Assuming first two columns are 'id' and 'image_name'
    labels = client_df['manoeuvre'].values
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    return dataset.batch(10)  # Batch size can be adjusted

client_datasets = [preprocess(client_df) for client_df in split_data]

def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(1024,)),
        tf.keras.layers.Dense(128, activation='relu'),

        # Correctly reshape the tensor; adjust this based on your network's needs
        # Example: Reshape from [128] to [16, 8]
        # Ensure that the product of the dimensions equals the size of the Dense layer output
        tf.keras.layers.Reshape((16, 8)),  # Adjust this reshape as needed

        tf.keras.layers.LSTM(16),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Or 'softmax' for multi-class
    ])
    return model

import tensorflow_federated as tff
import tensorflow as tf

def model_fn():
    # Create a Keras model
    keras_model = create_model()

    # Wrap the Keras model to be compatible with TFF
    return tff.learning.models.model_from_keras_model(
        keras_model=keras_model,
        input_spec=client_datasets[0].element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)

# Building the Federated Averaging process
# Building the Federated Averaging process
iterative_process = tff.learning.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.1),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
)


# Initialize the process
state = iterative_process.initialize()

# Perform training
NUM_ROUNDS = 10
for round_num in range(1, NUM_ROUNDS + 1):
    state, metrics = iterative_process.next(state, client_datasets)
    print(f'Round {round_num}, metrics={metrics}')

# Extract the trained Keras model
trained_model = create_model()
tff.learning.assign_weights_to_keras_model(trained_model, state.model)

# Save the trained model
trained_model.save('trained_federated_model.h5')

AttributeError: ignored

Down bellow is the manual approach based on the logic behind the Federated Learning approach (deviding the data for 3 "clients" then training 3 models independently with same model architecture and also with the checkpoint callback technique to ensure we get the best of the best models then combining them by the average of the weights of those models into one model)

In [94]:
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
train_data['manoeuvre_encoded'] = label_encoder.fit_transform(train_data['manoeuvre'])

# Selecting relevant columns (features and target) in the training dataset
numeric_data = train_data.select_dtypes(include=[np.number])


In [95]:
data_splits = np.array_split(numeric_data, 3)  # Split the data into three parts


The function to train the 3 models

In [117]:
from keras.layers import Input, LSTM, Dense
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.models import load_model


def train_model(data_split,model_name):
    """Trains a model on the given data split and returns the best trained model."""

    # Separate features and target
    features = data_split.drop(columns=['manoeuvre_encoded'])
    target = data_split['manoeuvre_encoded']

    # Reshape features for LSTM input (sequence length = 1)
    features = np.array(features)
    features_reshaped = features.reshape((features.shape[0], 1, features.shape[1]))

    # Define model architecture
    inputs = Input(shape=(1, features.shape[1]))
    lstm = LSTM(77)(inputs)
    outputs = Dense(len(np.unique(target)), activation='softmax')(lstm)
    model = Model(inputs=inputs, outputs=outputs)

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Define ModelCheckpoint callback
    checkpoint_filepath = str(model_name)+'_best_model.h5'  # Customize the filename as needed
    checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

    # Train the model with ModelCheckpoint
    model.fit(features_reshaped, target, epochs=15, batch_size=76, validation_split=0.2, callbacks=[checkpoint])

    # Load the best model from the checkpoint
    best_model = load_model(checkpoint_filepath)

    return best_model  # Return the best model

In [118]:
# Train three models
model1 = train_model(data_splits[0],'Model1')
model2 = train_model(data_splits[1],'Model2')
model3 = train_model(data_splits[2],'Model3')

Epoch 1/15
Epoch 1: val_accuracy improved from -inf to 0.27541, saving model to Model1_best_model.h5
Epoch 2/15
 10/108 [=>............................] - ETA: 1s - loss: 0.7621 - accuracy: 0.7000

  saving_api.save_model(


Epoch 2: val_accuracy did not improve from 0.27541
Epoch 3/15
Epoch 3: val_accuracy improved from 0.27541 to 0.32351, saving model to Model1_best_model.h5
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.32351
Epoch 5/15
Epoch 5: val_accuracy improved from 0.32351 to 0.34070, saving model to Model1_best_model.h5
Epoch 6/15
Epoch 6: val_accuracy did not improve from 0.34070
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.34070
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.34070
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.34070
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.34070
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.34070
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.34070
Epoch 13/15
Epoch 13: val_accuracy did not improve from 0.34070
Epoch 14/15
Epoch 14: val_accuracy did not improve from 0.34070
Epoch 15/15
Epoch 15: val_accuracy did not improve from 0.34070
Epoch 1/15
Epoch 1: val_accuracy improved from 

  saving_api.save_model(


Epoch 2: val_accuracy improved from 0.14728 to 0.17722, saving model to Model2_best_model.h5
Epoch 3/15
Epoch 3: val_accuracy improved from 0.17722 to 0.18262, saving model to Model2_best_model.h5
Epoch 4/15
Epoch 4: val_accuracy improved from 0.18262 to 0.19686, saving model to Model2_best_model.h5
Epoch 5/15
Epoch 5: val_accuracy did not improve from 0.19686
Epoch 6/15
Epoch 6: val_accuracy did not improve from 0.19686
Epoch 7/15
Epoch 7: val_accuracy improved from 0.19686 to 0.20619, saving model to Model2_best_model.h5
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.20619
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.20619
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.20619
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.20619
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.20619
Epoch 13/15
Epoch 13: val_accuracy did not improve from 0.20619
Epoch 14/15
Epoch 14: val_accuracy improved from 0.20619 to 0.21060, saving model to Model2

  saving_api.save_model(


Epoch 2: val_accuracy did not improve from 0.39077
Epoch 3/15
Epoch 3: val_accuracy did not improve from 0.39077
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.39077
Epoch 5/15
Epoch 5: val_accuracy did not improve from 0.39077
Epoch 6/15
Epoch 6: val_accuracy did not improve from 0.39077
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.39077
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.39077
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.39077
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.39077
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.39077
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.39077
Epoch 13/15
Epoch 13: val_accuracy did not improve from 0.39077
Epoch 14/15
Epoch 14: val_accuracy did not improve from 0.39077
Epoch 15/15
Epoch 15: val_accuracy did not improve from 0.39077


Initializing the averaged_model and getting the weights from the pretrained models

In [119]:
features = numeric_data.drop(columns=['manoeuvre_encoded'])
target = numeric_data['manoeuvre_encoded']

features = np.array(features)

inputs = Input(shape=(1, features.shape[1]))
lstm = LSTM(77)(inputs)
outputs = Dense(len(np.unique(target)), activation='softmax')(lstm)

# Create a new model with the same architecture
averaged_model = Model(inputs=inputs, outputs=outputs)

# Extract weights from each model
weights1 = model1.get_weights()
weights2 = model2.get_weights()
weights3 = model3.get_weights()

Calculating the average of the weights and affecting them into the averaged_model and compiling it

In [120]:
averaged_weights = []
for i in range(len(weights1)):
    layer_weights = [weights1[i], weights2[i], weights3[i]]
    averaged_weights.append(np.mean(layer_weights, axis=0))

# Set the averaged weights in the new model
averaged_model.set_weights(averaged_weights)

In [121]:
averaged_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [122]:
test_data = pd.read_csv("/content/drive/MyDrive/FL_LSTM/Dossier sans titre/test.csv")

Testing the averaged_model

In [123]:
from sklearn.preprocessing import LabelEncoder

test_data['manoeuvre_encoded'] = label_encoder.transform(test_data['manoeuvre'])

# Selecting relevant columns (features and target) in the testing dataset
test_numeric_data = test_data.select_dtypes(include=[np.number])
test_features = test_numeric_data.drop(columns=['manoeuvre_encoded'])
test_target = test_data['manoeuvre_encoded']

# Reshape data for LSTM input (sequence length = 1)
test_features = np.array(test_features)
test_features_reshaped = test_features.reshape((test_features.shape[0], 1, test_features.shape[1]))

# Evaluate the best model on the testing data
test_loss, test_accuracy = averaged_model.evaluate(test_features_reshaped, test_target)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 34.76%


As a conclusion, in this case our Federated Learning didn't performe as well as the normal approach