In [3]:
# @title Install dependencies
# @markdown Install `boto3`
!pip install boto3 --quiet

In [4]:
# @title Import packages
# @markdown `boto3`, `pandas`
import boto3       # s3 aws
import time
import pandas as pd

In [5]:
# @title Help function
# @markdown `data = load_data_from_s3()` : Load data from s3
def load_data_from_s3():
    """
    Get EEG and emotional data from s3 reading corresponding csv files.
    
    Parameters:
    No parameters are needed.
    
    Returns:
    data (dict): containing the EEG data and emotional information of subjects and games 
    
    Examples:
    - data[0][0]['EEG'] (pd.dataframe) contains the EEG data of subj 1 and game 1
    - and data[0][0]['EmoInf'] (dict) contains the emotional information
    """
    
    # Credential of s3 and name of bucket
    s3_client = boto3.client(
        's3',
        aws_access_key_id = 'AKIAVQB6Z4QI2QDANMPF',
        aws_secret_access_key = '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',
        region_name = 'eu-west-1'
    )
    bucket_name = "cerealtimekillers"

    # Load emotional information
    obj = s3_client.get_object(Bucket = bucket_name, Key = "GameLabels.csv")
    Emodf = pd.read_csv(obj['Body'])
    
    # Load EEG data
    ## define labels and directory
    root = "GAMEEMO"
    subDirectory = ["(S" + str(i).rjust(2, "0") + ")" for i in range(1, 28)]
    fileType = "Preprocessed EEG Data/.csv format"
    subjLabels = ["subj" + str(i).rjust(2, "0") for i in range(1, 28)]
    gameLabels = ["G1", "G2", "G3", "G4"]
    filenames = [str(i) + "AllChannels.csv" for i in gameLabels]
    
    ## loading csv files 
    data = dict()
    for i in range(len(subDirectory)):
        subdir = subDirectory[i]
        data[i] = dict()
        for j in range(len(filenames)):
            files = filenames[j]
            objFileDir = root + "/" + subdir + "/" + fileType + "/" + subdir[1:4] + files
            obj = s3_client.get_object(Bucket=bucket_name, Key=objFileDir)
            dt = pd.read_csv(obj['Body'])
            dt = dt.iloc[:, 0:14]
            aux_dict = dict()
            aux_dict["EEG"] = dt
            aux_dict["EmoInf"] = Emodf.iloc[i*4+j]
            data[i][j] = aux_dict
    
    return data


def CerealTimeKillersDataLoader(label_class, label_range, 
                                dataset_mix = True, 
                                winlen = None, stride = 1, nperseg = 256, fs = 129,
                                transform = None):
    """
    Cereal Time Killers Data Loader
    
    Inputs:
    dir_base (str): Working space dictionary
    label_class (CerealTimeKillersLabels): Labels used for model prediction
    label_range (1*2 list): The [min, max] of emotional states for transformation
    dataset_mix (bool): Whether to allow between-subject and between-game dataset mixture (Default is True)
    winlen (None/int): Time window for input sampling (for the whole timepoints, Default is None)
    stride (int): Temporal leap for input sampling (Default is 1)
    nperseg (int): N per seg of spectrogram (Default is 256)
    fs (int): Framerate of spectrogram (Default is 128)
    transform (torchvision.transforms.transforms.Compose): Torch transormfation (Default is None)
    
    Returns:
    FullDataset (CerealTimeKillersDatase list): full data with EEG spectrogram and fixed labels (information and/or emotional states) in CerealTimeKillersLabels
        FullDataset[i]: ith datapoint of [spectrogram, labels]
    DataSize (Tuple): Data size for single point as (Input size as tuple, Output size as int)
    ExpIndex (pandas.DataFrame): Corresponsing subject and game (as two columns) with shared row indices from FullDataset
    """
    
    specgram_name = 'full_specgram_1'

    # Load label & EEG data
    labels_df = pd.read_csv(f'{dir_base}GameLabels.csv')
    spec_df = pd.DataFrame(columns = label_class.fixed + [specgram_name], dtype = float)
    index_df = pd.DataFrame(columns = ['subject', 'game'], dtype = int)

    # load data from S3
    data = load_data_from_s3()



    # Create spectrogram dataframe
    for i in range(len(data)):
        if i != 0:
            break
        for j in range(len(data[i])):
            if j != 0:
                break
            # Load info and fixed labels
            EmoInf_data = data[i][j]['EmoInf'][label_class.fixed].values
            EmoInf_data = list((EmoInf_data - label_range[0]) / (label_range[1] - label_range[0]))

            EEG_data = data[i][j]['EEG']

            # Get EEG spectrogram
            spec_EEG = get_specgram(EEG_data, label_class.electrode, 
                                    winlen = winlen, stride = stride, nperseg = nperseg, fs = fs)

            # Add new data to dataframe
            new_spec_list, new_index_list = list(), list()
            if dataset_mix:
                for k in range(spec_EEG.shape[0]):
                    new_spec_list.append(EmoInf_data + [spec_EEG[k]])
                    new_index_list.append(data[i][j]['EmoInf'][['subject', 'game']].values.tolist())
            else:
                new_spec_list.append(EmoInf_data + [spec_EEG])
                new_index_list.append(data[i][j]['EmoInf'][['subject', 'game']].values.tolist())

            # Update dataframe
            new_spec_df = pd.DataFrame(new_spec_list, columns = label_class.fixed + [specgram_name], dtype = float)
            spec_df.append(new_spec_df, ignore_index = True)  
            new_index_df = pd.DataFrame(new_index_list, columns = ['subject', 'game'], dtype = int)
            index_df.append(new_index_df, ignore_index = True)
    
    # Output
    final_df = CerealTimeKillersDataset(df = spec_df, transform = transform)
    data_size = (tuple(final_df[0][0].shape), tuple(final_df[0][1].shape))

    return final_df, data_size, index_df


In [6]:
# Test colpase time
start = time.time()
data = load_data_from_s3()
end = time.time()
print(end - start)

37.471978187561035
