# VR CO2 Study - Data processing

This notebook does the following:

1. x
2. y
3. z

Input: input
Output: output

In [None]:
# TODO
#1. Participant 1 CO2 mask data time is weird. First row of data in mask co2 df is '2022-04-27 17:50:59 UTC' while the file suggests it started 2022-04-27T19-55-22
#2 Participant 10 seems to have the same issue as P1 but generated fixed time csv. Still needs solving

In [None]:
# Imports
import os
if('notebooks' in os.getcwd()):
    os.chdir('..')
import json
import pandas as pd
from utils.constants import AirFiles, CO2Files, DATA_COLUMNS, FREQUENCIES
from utils.load_data import load_data_with_event_matching
from utils.timestamps import read_unix, read_j2000, j2000_to_unix, generate_biopac_unix_timestamps
from classes.Participant import Participant
from classes.DataHandler import DataHandler
import numpy as np
import matplotlib.pyplot as plt
from utils.plots import plot_eyetracking_filter, plot_participant_overview, plot_assess_filter
from utils.normalisation import eye_tracking as normalise_pupil_size
import math


In [None]:
# Open file index JSON for reading
file_index = pd.read_json(os.path.join(os.getcwd(), 'temp/file_index.json'))
file_index = file_index.sort_index()
participant_ids = file_index.index
participant_ids

In [None]:
participant_ids = [27]
#check p28 atul c02 eye data
#participant_ids = [1,2,6,8,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,40,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62]
#working pcs
#participant_ids = [28,30,31,32,33,34,35,36,37,40,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62]
synced_data_directory = os.path.join(os.getcwd(), 'temp', 'synced_participant_data')

In [None]:
# Loop that generates downsampled, synced files for every participant
for participant_id in participant_ids:
    participant_to_retrieve = participant_id
    participant_file_index = file_index[0][participant_to_retrieve]
    participant = Participant(participant_file_index['id'])
    
    synced_participant_file = os.path.join(synced_data_directory, str(participant.id)) + '.csv'
    if(os.path.exists(synced_participant_file)):
        print('Synced file for participant: ' + str(participant.id) + ' found. Loading existing file.')
        participant.set_synced_data(pd.read_csv(synced_participant_file))
    else:
        print('Generating new synced participant file for participant: ' + str(participant.id))
        
        # AIR
        if(participant_file_index[AirFiles.MASK.value] is None):
            print('Air mask file missing')
        else:
            print('Loading Air condition Data')
            air_mask_file = participant_file_index[AirFiles.MASK.value]
            air_event_file = participant_file_index[AirFiles.EVENT.value]
            air_eyetracking_file = participant_file_index[AirFiles.EYE.value]
            air_biopac_file = participant_file_index[AirFiles.BIOPAC.value]
            air_biopac_start_unix = participant_file_index[AirFiles.BIOPAC_UNIX_START_TIME.value]
            print(read_unix(air_biopac_start_unix))

            # Load mask data
            participant.set_air_mask_data(DataHandler.load_mask_data(air_mask_file, air_event_file, participant.id))
            # Load eye tracking data
            participant.set_air_eye_data(DataHandler.load_eyetracking_data(air_eyetracking_file, participant.id, 'air'))  
            # Load biopac data
            participant.set_air_biopac_data(DataHandler.load_biopac_data(air_biopac_file, air_biopac_start_unix, participant.id))  
            # Sync eye tracking and biopac data
            participant.set_air_synced_data(DataHandler.sync_signal_data(participant.air_mask_data, participant.air_eye_data, participant.air_biopac_data, air_biopac_start_unix))

        #CO2
        print('Loading CO2 condition Data')
        co2_mask_file = participant_file_index[CO2Files.MASK.value]
        co2_event_file = participant_file_index[CO2Files.EVENT.value]
        co2_eyetracking_file = participant_file_index[CO2Files.EYE.value]
        co2_biopac_file = participant_file_index[CO2Files.BIOPAC.value]
        co2_biopac_start_unix = participant_file_index[CO2Files.BIOPAC_UNIX_START_TIME.value]

        # Load mask data
        participant.set_co2_mask_data(DataHandler.load_mask_data(co2_mask_file, co2_event_file, participant.id))
        # Load eye tracking data
        participant.set_co2_eye_data(DataHandler.load_eyetracking_data(co2_eyetracking_file, participant.id, 'co2'))  
        # Load biopac data
        participant.set_co2_biopac_data(DataHandler.load_biopac_data(co2_biopac_file, co2_biopac_start_unix, participant.id))  
        # Sync eye tracking and biopac data
        participant.set_co2_synced_data(DataHandler.sync_signal_data(participant.co2_mask_data, participant.co2_eye_data, participant.co2_biopac_data, co2_biopac_start_unix))
    
        print('Downsampling and combining data')
        #Downsample and combine data. This data is also saved
        participant.set_synced_data(DataHandler.downsample_participant_data(participant.id, DataHandler.label_data(participant.air_synced_data), DataHandler.label_data(participant.co2_synced_data)))
    
    # Filter data
    filtered_data = DataHandler.filter_data(participant.synced_data)
    # Normalise data per participant
    normalised_data = DataHandler.normalise_data(filtered_data)
    
    plot_participant_overview(normalised_data, True)
    
    # Feature extraction
    features = DataHandler.extract_features(normalised_data)
    # ML
    print('Finished data processing for participant: ' + participant.id)



In [None]:
filtered_data = DataHandler.filter_data(normalised_data)
plot_participant_overview(filtered_data, False)

In [None]:
plt.plot(normalised_data['VerboseData.Left.PupilDiameterMm'])

In [None]:
plt.plot(normalised_data['Biopac_GSR'])

In [None]:
plt.plot(normalised_data['Emg/Amplitude[CenterCorrugator]'])

In [None]:
plt.plot(normalised_data['Emg/Amplitude[CenterCorrugator]'][(normalised_data['Condition'] == 'CO2') & (normalised_data['Segment'] == 'gas_inhalation')])

In [None]:
#TODO: update to filter both pupils
test = Filters.filter_pupil_size(participant.air_synced_data)
plot_eyetracking_filter(participant.air_synced_data['VerboseData.Left.PupilDiameterMm'], test['VerboseData.Left.PupilDiameterMm'], participant.id, 'air')

In [None]:
#EXAMPLES ON HOW TO RETRIEVE DATA

In [None]:
participant.get_expression_calibration_data('air')

In [None]:
participant.get_brightness_calibration_data('air')

In [None]:
participant.get_condition_data('air')

In [None]:
test = DataHandler.downsample_participant_data(participant.id, participant.get_expression_calibration_data('air'), 
                                  participant.get_brightness_calibration_data('air'), participant.get_condition_data('air'), 
                                  participant.get_expression_calibration_data('co2'), participant.get_brightness_calibration_data('co2'), 
                                  participant.get_condition_data('co2'))

In [None]:
# Filter data with the fitstate threshold.
# POTENTIAL ISSUE WHEN ROWS WITH EVENT DATA ARE REMOVED. Should probably do it after
Filters.filter_fit_state_threshold(participant.get_expression_calibration_data('air'))

In [None]:
# Pass participant synced data for normalisation. Different normalisation methods per modality are used
# IMPLEMENT LATER SEE BELOW
# THIS DOESNT WORK YET
test2 = DataHandler.normalise_data(test)




In [None]:
test3 = normalise_pupil_size(test)

In [None]:
from sklearn.preprocessing import MinMaxScaler
#THIS WORKS BUT MEAN FILLING IS BAD
left_eye_min_max_scaler = MinMaxScaler()
eye_tracking_df = pd.DataFrame(participant.get_brightness_calibration_data('air')[DATA_COLUMNS.EYE_LEFT_PUPIL_SIZE.value])
synced_data = pd.DataFrame(participant.air_synced_data[DATA_COLUMNS.EYE_LEFT_PUPIL_SIZE.value])
left_eye_min_max_scaler.fit(eye_tracking_df)
scaled = left_eye_min_max_scaler.transform(synced_data)
scaled = pd.DataFrame(scaled, columns=synced_data.columns, index=synced_data.index)
#left_eye_min_max_scaler = MinMaxScaler()
#left_eye_min_max_scaler.fit([participant.air_synced_data[DATA_COLUMNS.EYE_LEFT_PUPIL_SIZE.value]])
#test = left_eye_min_max_scaler.fit_transform([participant.air_synced_data[DATA_COLUMNS.EYE_LEFT_PUPIL_SIZE.value]])

In [None]:
print(read_unix(1651321906.96))
#mask
print(read_unix(1651320319.88089))

In [None]:
# Convert J2000
print(read_j2000(704397173515))

In [None]:
plt.plot(normalised_data['VerboseData.Left.PupilDiameterMm'][(normalised_data['Condition'] == 'AIR') & (normalised_data['Segment'] == 'gas_inhalation')])