In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.metrics import mean_absolute_error
import os
import statistics


Random Forest

In [2]:
# Load data
acc_list = []
mae_list = []
f1_list = []

directory_path = "dataset/contexual/train/Fedmem/apriori"
# List all files in the directory
files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
# print(files)

for file in files:
    print(file)
    train_file_path = os.path.join(directory_path, file)
    test_file_path = "dataset/contexual/test/Fedmem/dynamic/" + file 
    df_train = pd.read_csv(train_file_path)
    df_test = pd.read_csv(test_file_path)

    # column_names = df_train.columns
    # print(column_names)


    df_train = df_train.drop('ImgID', axis=1)
    df_test = df_test.drop('ImgID', axis=1)

    X_train = df_train.drop('Mem_s', axis=1)
    y_train = df_train['Mem_s']


    X_test = df_test.drop('Mem_s', axis=1)
    y_test = df_test['Mem_s']

    # Train model
    rf = RandomForestClassifier(n_estimators=500, random_state=42)
    rf.fit(X_train, y_train)

    # Evaluate model
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    # print("Accuracy:", acc)
    mae = mean_absolute_error(y_test, y_pred)
    # print("Mean Absolute Error:", mae)
    # print("Classification Report:\n", classification_report(y_test, y_pred))
    acc_list.append(acc)
    mae_list.append(mae)
    
mean_acc = statistics.mean(acc_list)
acc_std_dev = statistics.stdev(acc_list) 
mean_mae = statistics.mean(mae_list)

print(f"accuracy {mean_acc}/(+-{acc_std_dev}) and mae {mean_mae} ")

Client_id_42.csv
Client_id_37.csv
Client_id_57.csv
Client_id_19.csv
Client_id_53.csv
Client_id_31.csv
Client_id_17.csv
Client_id_47.csv
Client_id_28.csv
Client_id_62.csv
Client_id_41.csv
Client_id_32.csv
Client_id_46.csv
Client_id_39.csv
Client_id_51.csv
Client_id_23.csv
Client_id_48.csv
Client_id_49.csv
Client_id_25.csv
Client_id_38.csv
Client_id_61.csv
Client_id_35.csv
Client_id_60.csv
Client_id_44.csv
Client_id_26.csv
Client_id_43.csv
Client_id_29.csv
Client_id_34.csv
Client_id_54.csv
Client_id_18.csv
Client_id_36.csv
Client_id_27.csv
Client_id_55.csv
Client_id_16.csv
Client_id_30.csv
Client_id_45.csv
Client_id_52.csv
Client_id_33.csv
Client_id_22.csv
Client_id_56.csv
accuracy 0.32189447328647186/(+-0.14982822259883127) and mae 2.323098011676806 


Logistic regression

In [4]:
# Load data
acc_list = []
mae_list = []
f1_list = []

df = pd.DataFrame()

save_path = "./results/contexual/dynamic"
if not os.path.exists(save_path):
    os.makedirs(save_path)
directory_path = "dataset/contexual/train/Fedmem/dynamic"
# List all files in the directory
files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
# print(files)

for file in files:
    # print(file)
    train_file_path = os.path.join(directory_path, file)
    test_file_path = "dataset/contexual/test/Fedmem/dynamic/" + file 
    df_train = pd.read_csv(train_file_path)
    df_test = pd.read_csv(test_file_path)

    # column_names = df_train.columns
    # print(column_names)


    df_train = df_train.drop('ImgID', axis=1)
    df_test = df_test.drop('ImgID', axis=1)

    X_train = df_train.drop('Mem_s', axis=1)
    y_train = df_train['Mem_s']


    X_test = df_test.drop('Mem_s', axis=1)
    y_test = df_test['Mem_s']

    # Train model
    model = LogisticRegression(C=0.1, solver='liblinear', max_iter=1000)

    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    # print("Accuracy:", acc)
    mae = mean_absolute_error(y_test, y_pred)
    # print("Mean Absolute Error:", mae)
    # print("Classification Report:\n", classification_report(y_test, y_pred))
    f1 = f1_score(y_test, y_pred, average='micro')
    # print("F1 Score:", f1)
    acc_list.append(acc)
    mae_list.append(mae)
    f1_list.append(f1)
mean_acc = statistics.mean(acc_list)
acc_std_dev = statistics.stdev(acc_list) 
mean_f1 = statistics.mean(f1_list)
f1_std_dev = statistics.stdev(f1_list) 

mean_mae = statistics.mean(mae_list)
mae_std_dev = statistics.stdev(mae_list) 
#print(f" {len(files)}")
df = pd.DataFrame({
    'lifelogger': files,
    'acc': acc_list,
    'mae': mae_list,
    'f1': f1_list
})
df.to_csv(save_path+ "/LR_acc_mae_f1.csv",index=False)
print(f"accuracy {mean_acc*100}/(+-{acc_std_dev}) and mae {mean_mae}/ (+- {mae_std_dev}) F1 {mean_f1*100}/(+-{f1_std_dev})  ")

accuracy 37.43276223062757/(+-0.12706801369607776) and mae 2.1665973879817897/ (+- 0.5981495109099446) F1 37.43276223062757/(+-0.12706801369607776)  
