In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the datasets
gd1_path = '/mnt/data/GD1.csv'
sd1_path = '/mnt/data/SD1.csv'

gd1 = pd.read_csv(gd1_path)
sd1 = pd.read_csv(sd1_path)

# Preprocess GD1
gd1_subcol = ['pNN25', 'MEAN_RR', 'HR', 'MEDIAN_RR', 'LF_PCT', 'LF']
gd1_label_mapping = {"baseline": 1, "meditation": 1, "amusement": 2, "stress": 3}
gd1['condition'] = gd1['condition'].replace(gd1_label_mapping)
y_gd1 = gd1['condition']
X_gd1 = gd1[gd1_subcol]

# Preprocess SD1
def categorize_rating(rating):
    if rating <= 32:
        return 0
    elif rating <= 64:
        return 1
    elif rating <= 96:
        return 2
    else:
        return 3

sd1['stress'] = sd1['Rating_Videorating'].apply(categorize_rating)
sd1_subcol = ['ECG', 'SCR', 'Temp', 'HR', 'LF']
y_sd1 = sd1['stress']
X_sd1 = sd1[sd1_subcol]

# Split the data into training and test sets for each dataset
X_train_gd1, X_test_gd1, y_train_gd1, y_test_gd1 = train_test_split(X_gd1, y_gd1, test_size=0.3, random_state=42)
X_train_sd1, X_test_sd1, y_train_sd1, y_test_sd1 = train_test_split(X_sd1, y_sd1, test_size=0.3, random_state=42)

# Train Random Forest models on each dataset
rf_gd1 = RandomForestClassifier(random_state=42)
rf_sd1 = RandomForestClassifier(random_state=42)

rf_gd1.fit(X_train_gd1, y_train_gd1)
rf_sd1.fit(X_train_sd1, y_train_sd1)

# Evaluate each model on its respective test set
gd1_pred = rf_gd1.predict(X_test_gd1)
sd1_pred = rf_sd1.predict(X_test_sd1)

gd1_report = classification_report(y_test_gd1, gd1_pred)
sd1_report = classification_report(y_test_sd1, sd1_pred)

(gd1_report, sd1_report)
