In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from adapt.feature_based import TCA

In [19]:
# loading WESAD data
subject_ids = [f"S{i}" for i in range(2, 18) if i not in [1, 12]]  
dfs = []

for sid in subject_ids:
    df = pd.read_csv(f"Processed WESAD Data/Processed_{sid}_data_unnormalised_new.csv")
    dfs.append(df)

wesad_data = pd.concat(dfs, ignore_index=True)
wesad_data

Unnamed: 0,Participant ID,Filtered EDA Mean,Filtered EDA Standard Deviation,SCL Mean,SCL Standard Deviation,SCR Mean,SCR Standard Deviation,HRV (RMSSD),HRV (SDNN),HR (bpm),TEMP Mean,TEMP Standard Deviation,TEMP Slope,TEMP Range,Label
0,2,1.319926,0.158933,1.311394,0.097091,0.048811,0.055282,227.981315,172.109971,85.243129,35.810000,0.022730,-0.000183,0.12,0
1,2,1.148486,0.123709,1.154106,0.116166,0.023503,0.031501,212.441958,170.307538,79.455212,35.768000,0.049322,-0.000651,0.21,0
2,2,0.969361,0.161578,0.967142,0.149690,0.011604,0.015669,184.233470,152.013695,78.617389,35.719333,0.036600,-0.000422,0.15,0
3,2,0.780818,0.099444,0.784546,0.106375,0.004581,0.006427,159.404043,153.345582,78.572928,35.708333,0.023464,0.000215,0.09,0
4,2,0.656155,0.073471,0.654764,0.072738,0.004088,0.006014,125.189743,136.537417,74.573025,35.744167,0.038828,0.000520,0.17,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,17,1.085934,0.014432,1.085905,0.004904,0.003564,0.010046,107.596426,98.159150,125.852124,32.333167,0.070699,0.000994,0.27,1
1100,17,1.083978,0.007285,1.084305,0.005482,0.001510,0.003488,118.036298,116.497528,119.685699,32.427667,0.063715,0.000900,0.24,1
1101,17,1.075400,0.008162,1.075364,0.006820,0.001835,0.004210,152.322806,126.263518,112.021025,32.530750,0.076981,0.001091,0.26,1
1102,17,1.065623,0.011943,1.064553,0.008547,0.003008,0.004770,160.252394,131.740600,108.012618,32.616167,0.055230,0.000719,0.22,1


In [20]:
# loading affective_data
subject_ids = [f"Drv_{i}" for i in range(1, 14) if i != 2]  
affective_data = {}

for sid in subject_ids:
    left = pd.read_csv(f"Processed AffectiveROAD Data/{sid}_left_data_unnormalised_.csv")
    right = pd.read_csv(f"Processed AffectiveROAD Data/{sid}_right_data_unnormalised_.csv")
    combined = pd.concat([left, right], ignore_index=True)
    affective_data[sid] = combined

affective_data['Drv_1']

Unnamed: 0,Participant ID,Wrist,Filtered EDA Mean,Filtered EDA Standard Deviation,SCL Mean,SCL Standard Deviation,SCR Mean,SCR Standard Deviation,HRV (RMSSD),HRV (SDNN),HR (bpm),TEMP Mean,TEMP Standard Deviation,TEMP Slope,TEMP Range,Stress Label
0,1,Left,5.219569,0.231495,5.218068,0.152462,0.044282,0.095538,186.782481,170.410122,71.521511,34.825917,0.017078,0.000161,0.08,0.0
1,1,Left,5.388524,0.236291,5.375866,0.160373,0.051729,0.095987,130.869533,93.075210,67.794466,34.830583,0.015906,0.000006,0.06,0.0
2,1,Left,5.279265,0.386605,5.255102,0.309213,0.058357,0.100687,181.555839,127.773168,71.092304,34.817583,0.025674,-0.000321,0.10,0.0
3,1,Left,4.801925,0.395216,4.806559,0.367729,0.027595,0.051054,141.964733,108.598740,70.477189,34.805083,0.019386,-0.000005,0.07,0.0
4,1,Left,4.394909,0.180730,4.397087,0.199027,0.010146,0.014696,84.467369,61.161047,68.788198,34.811667,0.021422,0.000250,0.07,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,1,Right,2.808709,0.166746,2.809205,0.160343,0.008685,0.013871,158.567781,105.902209,90.995261,34.356333,0.139546,-0.001966,0.40,1.0
76,1,Right,3.631054,0.132800,3.600742,0.070711,0.049462,0.086135,21.949279,31.387660,77.002639,36.735333,0.072272,0.001008,0.24,1.0
77,1,Right,3.740914,0.307837,3.833648,0.423779,0.030531,0.076302,56.221057,58.151319,76.699080,36.795583,0.028731,0.000258,0.14,1.0
78,1,Right,4.914828,0.379760,4.912760,0.323356,0.031633,0.102330,24.838477,32.150903,75.729687,36.727250,0.030454,-0.000014,0.11,1.0


In [21]:
# defining features
feature_col = ['HR (bpm)', 'HRV (RMSSD)', 'HRV (SDNN)', 'Filtered EDA Mean', 'Filtered EDA Standard Deviation', 'SCL Mean', 'SCL Standard Deviation', 'SCR Mean', 'SCR Standard Deviation', 'TEMP Mean', 'TEMP Standard Deviation', 'TEMP Slope', 'TEMP Range']

# defining X and y
X_wesad = wesad_data[feature_col]
y_train = wesad_data['Label']
groups = wesad_data['Participant ID']

In [None]:
score = []

# normalising WESAD training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_wesad)

# training model on WESAD
model = SVC(kernel='linear', C=1.0, probability=False)
model.fit(X_train_scaled, y_train)

# looping through each participant in the Affective dataset
for sid, df in affective_data.items():

    X_test = df[feature_col]
    y_test = df['Stress Label']
    
    # applying same scaling from training data
    X_test_scaled = scaler.transform(X_test)
    
    # predicting and evaluating
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    score.append({
        "Participant": sid,
        "Accuracy": acc,
        "F1 Score": f1
    })

results_df = pd.DataFrame(score)
results_df

Unnamed: 0,Participant,Accuracy,F1 Score
0,Drv_1,0.6125,0.586667
1,Drv_3,0.7375,0.666667
2,Drv_4,0.571429,0.608696
3,Drv_5,0.652174,0.737705
4,Drv_6,0.630952,0.617284
5,Drv_7,0.678571,0.742857
6,Drv_8,0.5,0.576923
7,Drv_9,0.460526,0.0
8,Drv_10,0.579545,0.430769
9,Drv_11,0.826087,0.813953


In [23]:
print(f" Average Accuracy = {results_df["Accuracy"].mean()}")

 Average Accuracy = 0.6050991485631073


In [24]:
print(results_df["F1 Score"].mean())

0.5785005929389425


In [25]:
def coral(source, target):
    # computing covariance matrix of source and target
    d = source.shape[1]
    cov_source = np.cov(source, rowvar=False) + np.eye(d) * 1e-5
    cov_target = np.cov(target, rowvar=False) + np.eye(d) * 1e-5

    # computing whitening and coloring transforms
    U_s, S_s, _ = np.linalg.svd(cov_source)
    U_t, S_t, _ = np.linalg.svd(cov_target)

    # whitening source
    sqrt_inv_S = np.diag(1.0 / np.sqrt(S_s))
    whitening = U_s @ sqrt_inv_S @ U_s.T

    # coloring to match target
    sqrt_S = np.diag(np.sqrt(S_t))
    coloring = U_t @ sqrt_S @ U_t.T

    # aligning 
    source_coral = (source - source.mean(axis=0)) @ whitening @ coloring + target.mean(axis=0)

    return source_coral

In [None]:
coral_scores = []

# fitting scaler on WESAD train once
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_wesad)

# looping through each participant
for sid, df in affective_data.items():
    # scaling test data
    X_test = df[feature_col]
    y_test = df['Stress Label']
    X_test_scaled = scaler.transform(X_test)
    
    # applying CORAL
    X_train_coral = coral(X_train_scaled, X_test_scaled)
    
    # training model on adapted data
    model = SVC(kernel='linear', C=1.0, probability=False)
    model.fit(X_train_coral, y_train)
    
    # evaluating on the (unadapted) test set
    y_pred = model.predict(X_test_scaled)  
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    coral_scores.append({
        "Participant": sid,
        "Accuracy": acc,
        "F1 Score": f1
    })

results_coral_df = pd.DataFrame(coral_scores)
results_coral_df

Unnamed: 0,Participant,Accuracy,F1 Score
0,Drv_1,0.7625,0.753247
1,Drv_3,0.8,0.764706
2,Drv_4,0.72619,0.684932
3,Drv_5,0.771739,0.727273
4,Drv_6,0.559524,0.493151
5,Drv_7,0.52381,0.428571
6,Drv_8,0.545455,0.52381
7,Drv_9,0.486842,0.360656
8,Drv_10,0.772727,0.756098
9,Drv_11,0.826087,0.8


In [27]:
print(f"Average Accuracy = {results_coral_df['Accuracy'].mean()}")

Average Accuracy = 0.6598109135652043


In [28]:
results_coral_df['F1 Score'].mean()

0.6065297748893725

In [None]:
importance_scores = []

# fitting scaler on source data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_wesad)

# looping through each target participant
for sid, df in affective_data.items():
    X_test = df[feature_col]
    y_test = df['Stress Label']
    X_test_scaled = scaler.transform(X_test)

    # combining source and target for domain classification
    X_combined = np.vstack([X_train_scaled, X_test_scaled])
    # domain labels 0=source, 1=target
    y_domain = np.hstack([np.zeros(len(X_train_scaled)), np.ones(len(X_test_scaled))])

    # training domain classifier to discriminate source vs target
    domain_clf = LogisticRegression(max_iter=1000)
    domain_clf.fit(X_combined, y_domain)

    # predicting probability of sample being from target domain
    p_target = domain_clf.predict_proba(X_train_scaled)[:, 1]

    # calculating weights: w_i = p_i / (1 - p_i)
    epsilon = 1e-6  # to avoid division by 0
    sample_weights = p_target / (1 - p_target + epsilon)

    max_weight = 10
    sample_weights = np.clip(sample_weights, 0, max_weight)

    # training weighted classifier on source data
    clf = SVC(kernel='linear', C=1.0, probability=False)
    clf.fit(X_train_scaled, y_train, sample_weight=sample_weights)

    # evaluating on target participant
    y_pred = clf.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    importance_scores.append({
        "Participant": sid,
        "Accuracy": acc,
        "F1 Score": f1
    })

results_importance_df = pd.DataFrame(importance_scores)
results_importance_df

Unnamed: 0,Participant,Accuracy,F1 Score
0,Drv_1,0.5625,0.313725
1,Drv_3,0.675,0.675
2,Drv_4,0.571429,0.419355
3,Drv_5,0.619565,0.653465
4,Drv_6,0.535714,0.315789
5,Drv_7,0.547619,0.5
6,Drv_8,0.454545,0.414634
7,Drv_9,0.526316,0.217391
8,Drv_10,0.556818,0.204082
9,Drv_11,0.75,0.656716


In [30]:
results_importance_df["Accuracy"].mean()

0.5666255456658774

In [31]:
results_importance_df["F1 Score"].mean()

0.45883009329276847

In [32]:
tca_scores = []

# fitting scaler on WESAD training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_wesad)

for sid, df in affective_data.items():
    # scaling test data using same scaler
    X_test = df[feature_col]
    y_test = df['Stress Label']
    X_test_scaled = scaler.transform(X_test)

    base_estimator = SVC(kernel='linear', C=1.0, probability=False)
    tca = TCA(estimator=base_estimator, mu=10, n_components=16, kernel="rbf")  

    # unsupervised fit 
    tca.fit(X_train_scaled, y_train, Xt=X_test_scaled)

    # predicting on target data
    y_pred = tca.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    tca_scores.append({
        "Participant": sid,
        "Accuracy": acc,
        "F1 Score": f1
    })

results_tca_df = pd.DataFrame(tca_scores)
results_tca_df

Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...
Fit transform...
Fit Estimator...


Unnamed: 0,Participant,Accuracy,F1 Score
0,Drv_1,0.5125,0.235294
1,Drv_3,0.675,0.551724
2,Drv_4,0.5,0.3
3,Drv_5,0.673913,0.741379
4,Drv_6,0.547619,0.344828
5,Drv_7,0.583333,0.705882
6,Drv_8,0.556818,0.580645
7,Drv_9,0.578947,0.272727
8,Drv_10,0.556818,0.315789
9,Drv_11,0.75,0.716049


In [33]:
results_tca_df["Accuracy"].mean()

0.5779124297073381

In [34]:
results_tca_df["F1 Score"].mean()

0.49607077178655584