In [None]:
import pandas as pd
import numpy as np
import sklearn as skl
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
EPOCHS = 1
STEP_SIZE = 0.1

In [None]:
train_df = pd.read_csv('features_.csv')
train_df = train_df.sample(frac=1)
train_df = train_df.fillna(train_df.median())
train_df = train_df.drop_duplicates()

In [None]:
acci = []
for visual_weight in np.arange(0, 1.0 + STEP_SIZE, STEP_SIZE):
    epoch_acci = []
    physio_weight = 1 - visual_weight
    for epoch in range(EPOCHS):
        
        train_set, test_set = train_test_split(train_df, test_size=0.2)
        imputer = SimpleImputer(strategy='median')

        X_batch_A_train = train_set[['eye_category','eye_position','gaze_direction']]
        X_batch_A_train = imputer.fit_transform(X_batch_A_train)

        X_batch_N_train = train_set[['heart_rates','p2p_intervals','sys_peaks','dys_peaks']]
        X_batch_N_train = imputer.fit_transform(X_batch_N_train)

        X_batch_A_test = test_set[['eye_category','eye_position','gaze_direction']]
        X_batch_A_test = imputer.fit_transform(X_batch_A_test)

        X_batch_N_test = test_set[['heart_rates','p2p_intervals','sys_peaks','dys_peaks']]
        X_batch_N_test = imputer.fit_transform(X_batch_N_test)

        y_train = train_set['engagement_labels']
        y_test = test_set['engagement_labels']

        scaler_physio = skl.preprocessing.StandardScaler().fit(X_batch_N_train)
        X_batch_N_train = scaler_physio.transform(X_batch_N_train)
        X_batch_N_test = scaler_physio.transform(X_batch_N_test)

        # Train AdaBoost on visual features
        visual_model = AdaBoostClassifier(n_estimators=100, random_state=42);
        visual_model.fit(X_batch_A_train, y_train);

        # Train Random Forest on physiological features
        physio_model = RandomForestClassifier(n_estimators=100, random_state=42);
        physio_model.fit(X_batch_N_train, y_train);

        # Get probability predictions from both models
        visual_preds_train = visual_model.predict_proba(X_batch_A_train)
        visual_preds_test = visual_model.predict_proba(X_batch_A_test)

        physio_preds_train = physio_model.predict_proba(X_batch_N_train)
        physio_preds_test = physio_model.predict_proba(X_batch_N_test)

        fused_train = np.concatenate([visual_preds_train*visual_weight, physio_preds_train*physio_weight], axis=1)
        fused_test = np.concatenate([visual_preds_test*visual_weight, physio_preds_test*physio_weight], axis=1)

        meta_classifier = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42)
        meta_classifier.fit(fused_train, y_train)

        final_predictions = meta_classifier.predict(fused_test);
        
        accuracy = accuracy_score(y_test, final_predictions);

        epoch_acci.append(accuracy);
    
    acci.append(sum(epoch_acci)/len(epoch_acci));

In [None]:
plt.plot(acci);