In [128]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [129]:
# load the dataset into a Pandas DataFrame
df = pd.read_csv('./bio_features.csv', index_col=0)

In [130]:
df.shape

(3108, 23)

In [131]:
df.head()

Unnamed: 0,comfort,surprise,anxiety,calmness,boredom,ECG_Rate,HRV_RMSSD,HRV_MeanNN,HRV_SDNN,HRV_SDSD,...,EDA_Tonic_std,SCR_Onsets,SCR_Magnitude,SCR_Amplitude_Mean,SCR_RiseTime_Mean,SCR_RecoveryTime_Mean,Pupil_Mean,Pupil_Std,s_id,t_id
1,1.535117,0.51505,0.498328,0.973244,1.458194,93.672939,16.349132,642.222222,16.647047,16.845317,...,0.157719,0.083333,0.319789,0.000213,0.000715,0.000181,50.107703,4.686173,P05,2
2,1.824415,0.503344,1.025084,0.493311,1.401338,82.927241,37.975431,724.5,48.703183,39.1685,...,0.06854,0.083333,0.495892,0.000331,0.002347,0.000149,51.535008,3.995601,P05,2
3,1.866221,0.453177,1.020067,1.483278,1.526756,80.265577,45.153706,750.4,56.097874,46.79814,...,0.067606,0.166667,0.713099,0.000665,0.000491,0.000128,50.089017,4.202678,P05,2
4,1.842809,0.249164,1.483278,1.545151,1.535117,80.506441,38.199476,745.5,52.028197,39.292796,...,0.054674,0.083333,0.080493,5.4e-05,0.000176,6.9e-05,47.986345,5.933136,P05,2
5,1.857859,0.521739,0.682274,1.406355,1.655518,76.498652,43.761592,785.142857,77.426747,43.10036,...,0.347407,0.25,2.94786,0.002855,0.002752,0.000432,49.469538,4.686228,P05,2


In [132]:
# drop the columns that are not needed
df = df.drop(["s_id", "t_id"], axis=1)

df.shape

(3108, 21)

In [133]:
# extract the features and target variable
X = df.drop(["surprise", "anxiety", "boredom", "calmness", "comfort"], axis=1)
y = df[["surprise", "anxiety", "boredom", "calmness", "comfort"]]

In [134]:
X.shape

(3108, 16)

In [135]:
y.shape

(3108, 5)

In [136]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [137]:
# initialize the StandardScaler object
scaler = StandardScaler()

# fit and transform the data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [138]:
print("length of X_train_scaled:", len(X_train_scaled))
print("length of y_train:", len(y_train))
print("length of X_test_scaled:", len(X_test_scaled))
print("length of y_test:", len(y_test))

length of X_train_scaled: 2486
length of y_train: 2486
length of X_test_scaled: 622
length of y_test: 622


In [139]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# initialize and fit the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# make predictions on the testing set
y_pred = model.predict(X_test_scaled)

# evaluate the model using mean squared error and R-squared
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean squared error:", mse)
print("R-squared:", r2)

Mean squared error: 0.22494036422180633
R-squared: 0.06696588657739463
