In [9]:
import pandas as pd
import numpy as np

np.random.seed(42)

#Parameters
n_apprenants = 200
n_essais = 20  # par apprenant
n_total = n_apprenants * n_essais

# Creating columns
ID_s = np.repeat(np.arange(1, n_apprenants+1), n_essais)
response_time = np.random.uniform(low=0, high=60, size=n_total)
errors = np.random.poisson(lam=1.5, size=n_total)
Mean_HR = np.random.normal(loc=50, scale=10, size=n_total)
HRV_SDNN = np.random.normal(loc=50, scale=10, size=n_total)
HRV_RMSSD = np.random.normal(loc=35, scale=8, size=n_total)
EEG_alpha = np.random.normal(loc=8, scale=2, size=n_total)
EEG_beta = np.random.normal(loc=15, scale=3, size=n_total)
subjective_values = np.random.randint(1, 8, size=n_total)

#Let's simulate a simple label : weak/medium/high cognitive load
#Just a simple logic for now : the higher the response time and error rate, the higher the cognitive load. It will be more complex in reality, though.
cognitive_load_simple = 0.4*temps_reponse + 0.4*erreurs + 0.2*(subjective_values/7)
labels = pd.qcut(cognitive_load_simple, q=3, labels=["weak", "medium", "high"])

# =============================================
# Here i try to generate a more complex cognitive load label
#    Directions:
#      ↑ response_time, ↑ errors, ↑ Mean_HR, ↑ EEG_beta, ↑ self_report  => ↑ load
#      ↑ HRV_SDNN, ↑ HRV_RMSSD, ↑ EEG_alpha                              => ↓ load
# =============================================

def scale_01(x):
    x = np.asarray(x, dtype=float)
    xmin, xmax = np.nanmin(x), np.nanmax(x)
    if xmax == xmin:
        return np.zeros_like(x)
    return (x - xmin) / (xmax - xmin)

# Scale to comparable ranges
rt_s     = scale_01(response_time)
err_s    = scale_01(errors)
hr_s     = scale_01(Mean_HR)
sdnn_s   = scale_01(HRV_SDNN)
rmssd_s  = scale_01(HRV_RMSSD)
alpha_s  = scale_01(EEG_alpha)
beta_s   = scale_01(EEG_beta)
subj_s = scale_01(self_report)

# Negative contributors become (1 - scaled)
neg_sdnn  = 1.0 - sdnn_s
neg_rmssd = 1.0 - rmssd_s
neg_alpha = 1.0 - alpha_s

# Weights (sum to 1.0); adjustable
weights = {
    "rt":      0.20,  # response time
    "err":     0.20,  # errors
    "hr":      0.10,  # Mean_HR
    "beta":    0.15,  # EEG_beta
    "subj":  0.10,  # self-report measures
    "sdnn":    0.10,  # HRV_SDNN (inverse)
    "rmssd":   0.075, # HRV_RMSSD (inverse)
    "alpha":   0.075, # EEG_alpha (inverse)
}

score_complex = (
    weights["rt"]     * rt_s      +
    weights["err"]    * err_s     +
    weights["hr"]     * hr_s      +
    weights["beta"]   * beta_s    +
    weights["subj"] * subj_s  +
    weights["sdnn"]   * neg_sdnn  +
    weights["rmssd"]  * neg_rmssd +
    weights["alpha"]  * neg_alpha
)

cognitive_load = pd.qcut(score_complex, q=3, labels=["weak", "medium", "high"])

# Creating the dataframe
df = pd.DataFrame({
    "learner_id": ids,
    "response_time": response_time,
    "errors": errors,
    "Mean_HR": Mean_HR,
    "HRV_SDNN": HRV_SDNN,
    "HRV_RMSSD": HRV_RMSSD,
    "EEG_alpha": EEG_alpha,
    "EEG_beta": EEG_beta,
    "self_report": subjective_values,
    "cognitive_load_simple": cognitive_load_simple,
    "cognitive_load": labels
})

NameError: name 'cognitive_load_simple' is not defined

In [5]:
df.to_csv("cog_data.csv", index=False)

In [6]:
df.head()

Unnamed: 0,learner_id,response_time,errors,MeanHR,HRV_MeanNN,HRV_SDNN,HRV_RMSSD,HRV_SDANN5,EEG_alpha,EEG_beta,EEG_theta,self_report,cognitive_load
0,1,22.472407,3,71.743873,921.799256,42.96263,36.374737,28.02564,7.337915,18.925649,7.352275,4,medium
1,1,57.042858,0,89.806249,733.249188,50.138357,40.50452,32.309995,4.967289,12.623541,4.861092,1,high
2,1,43.919637,1,65.672207,795.729058,42.798371,47.800352,27.377601,3.879961,7.472444,3.988194,3,medium
3,1,35.919509,2,75.821524,897.087306,50.70697,35.170071,37.343521,5.804971,18.235991,5.878897,1,high
4,1,9.361118,0,76.98277,855.208758,46.195315,30.620707,45.311041,8.091369,11.251515,3.764296,5,low


In [7]:
df

Unnamed: 0,learner_id,response_time,errors,MeanHR,HRV_MeanNN,HRV_SDNN,HRV_RMSSD,HRV_SDANN5,EEG_alpha,EEG_beta,EEG_theta,self_report,cognitive_load
0,1,22.472407,3,71.743873,921.799256,42.962630,36.374737,28.025640,7.337915,18.925649,7.352275,4,medium
1,1,57.042858,0,89.806249,733.249188,50.138357,40.504520,32.309995,4.967289,12.623541,4.861092,1,high
2,1,43.919637,1,65.672207,795.729058,42.798371,47.800352,27.377601,3.879961,7.472444,3.988194,3,medium
3,1,35.919509,2,75.821524,897.087306,50.706970,35.170071,37.343521,5.804971,18.235991,5.878897,1,high
4,1,9.361118,0,76.982770,855.208758,46.195315,30.620707,45.311041,8.091369,11.251515,3.764296,5,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,200,26.526422,3,72.726884,729.816635,41.308708,29.900395,37.317007,3.278917,16.425938,8.246637,7,high
3996,200,20.064071,4,79.220545,716.568529,57.597709,29.419697,22.185977,10.776644,13.109997,7.841383,6,medium
3997,200,23.674339,1,79.832199,777.742974,53.706140,33.512474,37.284862,5.522957,14.030647,4.321099,6,medium
3998,200,31.796435,0,61.720755,775.577695,44.607705,40.017004,18.759596,7.628082,20.508856,7.727779,4,medium
