In [None]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from google.colab import files

In [None]:
for x in range(1,11):
  x=str(x)
  # Load EDA data
  eda_df = pd.read_csv('/content/drive/MyDrive/Data/E4/S'+x+'_N_M/Baseline/EDA.csv')

  # Rename the column to 'EDA'
  eda_df.columns = ['EDA']

  # Drop the first row
  eda_df = eda_df.drop([0])

  # Convert the 'EDA' column to float
  eda_df['EDA'] = eda_df['EDA'].astype(float)

  # Segment size (5 minutes in your case, 4 samples per second)
  segment_size = int(1.5 * 60 * 4)

  # Create an empty DataFrame to store the calculated features
  features_df = pd.DataFrame()

  # Loop over the EDA data by segment
  for i in range(0, len(eda_df), segment_size):
      # Extract the segment as a Series and reset the index
      segment = eda_df.loc[i:i+segment_size, 'EDA'].reset_index(drop=True)

      # Calculate time-domain features
      mean = np.mean(segment)
      std_dev = np.std(segment)
      min_val = np.min(segment)
      max_val = np.max(segment)
      range_val = max_val - min_val

      # Identify peaks (skin conductance responses)
      peaks, _ = find_peaks(segment)
      num_peaks = len(peaks)

      # Calculate the mean amplitude and duration of the peaks
      if num_peaks > 0:
          peak_amplitudes = segment[peaks]
          mean_peak_amplitude = np.mean(peak_amplitudes)
          peak_durations = np.diff(peaks) / 4  # Convert from samples to seconds
          mean_peak_duration = np.mean(peak_durations)
      else:
          mean_peak_amplitude = 0
          mean_peak_duration = 0

      # Write the features to a new row of the DataFrame
      features_df = features_df.append({
          'mean': mean,
          'std_dev': std_dev,
          'min': min_val,
          'max': max_val,
          'range': range_val,
          'num_peaks': num_peaks,
          'mean_peak_amplitude': mean_peak_amplitude,
          'mean_peak_duration': mean_peak_duration
      }, ignore_index=True)

  print(features_df)
  # Save the features to a new CSV file
  features_df.to_csv('/content/drive/MyDrive/Data/E4/S'+x+'_N_M/EDA_Baseline_1_5.csv', index=False)


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  0.077706  0.010463  0.000000  0.101152  0.101152      123.0   
1  0.110076  0.009608  0.092189  0.139564  0.047375      123.0   
2  0.133630  0.005582  0.122919  0.156210  0.033291      127.0   
3  0.149871  0.004560  0.139564  0.169014  0.029450      127.0   
4  0.162711  0.004615  0.152368  0.172855  0.020487      127.0   
5  0.170986  0.001943  0.167733  0.174135  0.006402       13.0   

   mean_peak_amplitude  mean_peak_duration  
0             0.079573            0.727459  
1             0.111978            0.727459  
2             0.135501            0.700397  
3             0.151743            0.704365  
4             0.164305            0.706349  
5             0.172855            0.666667  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  1.615037  0.264286  0.000000  2.440248  2.440248       37.0   
1  1.746009  0.335187  1.206795  3.013095  1.806300       32.0   
2  1.816535  0.123274  1.612956  2.264800  0.651844       41.0   
3  1.661107  0.077307  1.537398  1.898538  0.361140       18.0   

   mean_peak_amplitude  mean_peak_duration  
0             1.588016            2.416667  
1             1.766934            2.862903  
2             1.828166            2.181250  
3             1.679336            2.514706  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  0.599294  0.036147  0.000000  0.628679  0.628679      104.0   
1  0.561893  0.086709  0.458652  0.683737  0.225085       98.0   
2  0.492311  0.008701  0.471456  0.512429  0.040973      104.0   
3  0.507142  0.004338  0.495784  0.517551  0.021767       42.0   

   mean_peak_amplitude  mean_peak_duration  
0             0.604302            0.861650  
1             0.565059            0.914948  
2             0.495365            0.861650  
3             0.509838            0.823171  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  0.447857  0.038887  0.000000  0.544271  0.544271       60.0   
1  0.284170  0.088875  0.121661  0.463591  0.341930       77.0   
2  0.153212  0.011815  0.137028  0.181850  0.044822       87.0   
3  0.137067  0.002056  0.133186  0.149835  0.016649       84.0   
4  0.134283  0.001846  0.130625  0.144712  0.014087       89.0   
5  0.133635  0.001696  0.129344  0.137028  0.007684       25.0   

   mean_peak_amplitude  mean_peak_duration  
0             0.451361            1.487288  
1             0.282921            1.148026  
2             0.155060            1.034884  
3             0.139147            1.072289  
4             0.136050            1.014205  
5             0.135543            0.947917  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  0.670733  0.143642  0.000000  1.124666  1.124666       68.0   
1  0.435701  0.102569  0.315303  0.700775  0.385472       70.0   
2  0.912749  0.165069  0.636743  1.711366  1.074623       52.0   
3  1.005108  0.129938  0.826347  1.570496  0.744149       61.0   
4  0.660162  0.078979  0.535642  0.828908  0.293266       97.0   
5  0.584767  0.025974  0.539484  0.620164  0.080680        3.0   

   mean_peak_amplitude  mean_peak_duration  
0             0.666843            1.324627  
1             0.414224            1.278986  
2             0.895520            1.725490  
3             0.975905            1.408333  
4             0.662372            0.916667  
5             0.604370            1.750000  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  1.759548  0.227982  0.000000  3.111704  3.111704       55.0   
1  1.653654  0.174989  1.460560  2.190523  0.729963       55.0   
2  1.758214  0.258360  1.396329  2.344200  0.947871       49.0   
3  1.537927  0.074123  1.365594  1.786924  0.421330       43.0   
4  1.604472  0.083234  1.402732  1.945722  0.542990       55.0   
5  1.633992  0.074914  1.532276  1.908783  0.376507       31.0   

   mean_peak_amplitude  mean_peak_duration  
0             1.844962            1.620370  
1             1.638592            1.564815  
2             1.738788            1.791667  
3             1.542471            2.011905  
4             1.608204            1.629630  
5             1.629067            1.350000  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  3.322963  0.391244  0.000000  3.888129  3.888129       58.0   
1  2.927568  0.141847  2.669043  3.249171  0.580128       46.0   
2  2.985012  0.160982  2.752284  3.390041  0.637757       48.0   
3  2.806454  0.167173  2.521769  3.256855  0.735086       58.0   
4  2.644083  0.213393  2.334357  3.599986  1.265629       40.0   

   mean_peak_amplitude  mean_peak_duration  
0             3.355303            1.513158  
1             2.898249            1.938889  
2             2.983065            1.882979  
3             2.784388            1.561404  
4             2.581016            1.910256  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  1.469576  0.153519  0.000000  1.894497  1.894497       30.0   
1  1.689463  0.188619  1.316930  2.173875  0.856945       29.0   
2  1.901866  0.115511  1.709004  2.239187  0.530183       31.0   
3  1.808143  0.170073  1.619359  2.391583  0.772224       15.0   

   mean_peak_amplitude  mean_peak_duration  
0             1.496426            2.948276  
1             1.771296            2.991071  
2             1.947409            2.891667  
3             1.743240            2.089286  


  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


       mean   std_dev       min       max     range  num_peaks  \
0  0.388801  0.056513  0.000000  0.550674  0.550674       59.0   
1  0.273542  0.043927  0.198499  0.352175  0.153676       77.0   
2  0.163787  0.015440  0.145993  0.206183  0.060190       88.0   
3  0.156663  0.011815  0.137028  0.185692  0.048664       77.0   
4  0.184681  0.010835  0.166483  0.208744  0.042261       30.0   

   mean_peak_amplitude  mean_peak_duration  
0             0.391224            1.500000  
1             0.280110            1.157895  
2             0.166861            1.022989  
3             0.158383            1.151316  
4             0.186247            1.275862  
       mean   std_dev       min       max     range  num_peaks  \
0  0.530100  0.208353  0.000000  1.024776  1.024776       75.0   
1  0.120035  0.052726  0.039966  0.221817  0.181851      116.0   
2  0.183851  0.007836  0.167763  0.204902  0.037139      115.0   
3  0.165250  0.003275  0.158799  0.175447  0.016648       61.0   

  

  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({
  features_df = features_df.append({


Baseline Normalization

In [None]:
for x in range(1,11):
  x=str(x)
  # Load baseline data
  baseline_df = pd.read_csv('/content/drive/MyDrive/Data/E4/S'+x+'_M/EDA_Baseline_1_5.csv')

# If there are multiple rows per participant in the baseline data, average them
  if baseline_df.shape[0] > 1:
    baseline_df = pd.DataFrame(baseline_df.mean(numeric_only=True)).transpose()
  else:
    baseline_df = baseline_df.mean(numeric_only=True).to_frame().transpose()
  print(baseline_df)

  # Load the study data
  study_df = pd.read_csv('/content/drive/MyDrive/Data/E4/S'+x+'_M/EDA_Study_1_5.csv')
  print(study_df)

  # Create a new DataFrame for the normalized data
  normalized_df = pd.DataFrame()

  # Normalize each HRV metric by subtracting the baseline value
  for metric in ['mean', 'std_dev', 'min', 'max', 'range', 'num_peaks','mean_peak_amplitude','mean_peak_duration']:
    normalized_df[metric] = study_df[metric] - baseline_df[metric].values[0]
  print(normalized_df)

  # Save the normalized data to a new CSV file
  normalized_df.to_csv('/content/drive/MyDrive/Data/E4/S'+x+'_M/EDA_Normalized_1_5.csv', index=False)

       mean   std_dev       min      max     range  num_peaks  \
0  1.140309  0.090508  0.845537  1.35859  0.513053  60.428571   

   mean_peak_amplitude  mean_peak_duration  
0             1.149562            1.476452  
        mean   std_dev       min       max     range  num_peaks  \
0   1.155193  0.110220  0.000000  1.405110  1.405110       70.0   
1   1.369112  0.051048  1.286018  1.556315  0.270297       51.0   
2   1.516774  0.063227  1.394965  1.688213  0.293248       64.0   
3   1.397114  0.074071  1.288678  1.551193  0.262515       79.0   
4   1.597290  0.055145  1.488446  1.713824  0.225378       74.0   
5   1.429423  0.098115  1.259225  1.629307  0.370082       95.0   
6   1.328833  0.064793  1.224650  1.489726  0.265076       83.0   
7   1.146398  0.060254  1.067141  1.298923  0.231782      110.0   
8   1.045932  0.025507  0.969819  1.124767  0.154948      106.0   
9   0.970889  0.026969  0.892886  1.022322  0.129436      109.0   
10  0.882411  0.038704  0.759708  0.964598

In [None]:
import pandas as pd
import numpy as np

# thresholds for HRV measures
MEAN_THRESHOLD = 1
STD_THRESHOLD = 0.05
RANGE_THRESHOLD = 1

for x in range(1,11):
    x = str(x)

    # # Load eye label data
    # eye_df = pd.read_csv('/content/drive/MyDrive/Data/Eye_Tracker/'+x+'_N_M_LabelsU.csv')

    # # Create a new list to store the most frequent labels
    # most_frequent_labels = []

    # # Loop through eye label data by every 18000 rows (approx 5 mins)
    # for i in range(0, len(eye_df), 18000):
    #     # Extract the segment
    #     segment = eye_df.loc[i:i+18000, 'Label']

    #     # Check for the percentage of certain labels and assign the segment label
    #     if (segment == 1).mean() > 0.25:
    #         segment_label = 1
    #         # print(f'Segment 1 {i/18000}')
    #     elif (segment == 2).mean() > 0.2:
    #         segment_label = 2
    #         # print(f'Segment 2 {i/18000}')
    #     else:
    #         segment_label = 0

    #     # Add the segment label to the list
    #     most_frequent_labels.append(segment_label)

    # Load the normalized HRV data
    eda_df = pd.read_csv('/content/drive/MyDrive/Data/E4/S'+x+'_M/EDA_Normalized_1_5.csv')
    eda_df['Label'] = 0

    # # Handle different lengths
    # if len(most_frequent_labels) > len(eda_df):
    #     # Trim the most_frequent_labels if necessary
    #     most_frequent_labels = most_frequent_labels[:len(eda_df)]
    # elif len(most_frequent_labels) < len(eda_df):
    #     # Repeat the last label if necessary
    #     most_frequent_labels.extend([most_frequent_labels[-1]] * (len(eda_df) - len(most_frequent_labels)))

    # Apply the thresholds for HRV measures
    for i in range(len(eda_df)):
        conditions_met = sum([eda_df.loc[i, 'mean'] >= MEAN_THRESHOLD,
                              eda_df.loc[i, 'std_dev'] >= STD_THRESHOLD,
                              eda_df.loc[i, 'range'] >= RANGE_THRESHOLD])

        # Limit conditions_met to 2 (since we only have 2 levels of drowsiness)
        conditions_met = min(conditions_met, 2)

        # Increase the label only if the current label is less than the new one
        if conditions_met > 0 and eda_df.loc[i, 'Label'] < conditions_met:
            eda_df.loc[i, 'Label'] = conditions_met

    # for i in range(len(eda_df)):
    #     la = eda_df.loc[i, 'Label']
    #     if eda_df.loc[i, 'mean'] >= MEAN_THRESHOLD:
    #       if la == 1:
    #         eda_df.loc[i, 'Label'] = 2
    #       elif la == 0:
    #         eda_df.loc[i, 'Label'] = 1
    #     if eda_df.loc[i, 'std_dev'] >= STD_THRESHOLD:
    #       if la == 1:
    #         eda_df.loc[i, 'Label'] = 2
    #       elif la == 0:
    #         eda_df.loc[i, 'Label'] = 1
    #     if eda_df.loc[i, 'range'] >= RANGE_THRESHOLD:
    #       if la == 1:
    #         eda_df.loc[i, 'Label'] = 2
    #       elif la == 0:
    #         eda_df.loc[i, 'Label'] = 1

        # # If HRV data doesn't indicate drowsiness, apply the labels from eye tracking data
        # elif eda_df.loc[i, 'Label'] == 0:
        #     eda_df.loc[i, 'Label'] = most_frequent_labels[i]
    print(eda_df['Label'])

    # Save the DataFrame with the new column to a new CSV file
    eda_df.to_csv('/content/drive/MyDrive/Data/EDA/1_5/S'+x+'_M_EDA_Label_1_5.csv', index=False)

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    1
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
Name: Label, dtype: int64
0     2
1     0
2     0
3     0
4     0
5     1
6     2
7     2
8     0
9     0
10    0
11    0
12    0
13    1
14    2
15    1
16    0
17    0
18    1
19    0
20    0
21    1
22    0
23    2
24    2
25    1
26    2
27    0
28    0
29    0
30    2
31    2
Name: Label, dtype: int64
0     0
1     0
2     0
3     1
4     0
5     1
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
Name: Label, dtype: int64
0     2
1     1
2     1
3     1
4     0
5     0
6     1
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    1
21    1
22    1
23    0
24    0
25    0
26    0
27