In [174]:
import pandas as pd
import os
import pickle

In [150]:
# do this for all the files in data folder 
def preprocess_data(filename):
    experiment = pd.read_csv('data/' + filename)
    audio = experiment[pd.notnull(experiment['name'])][['name','LeftorRight.keys']]
    # map l/r to 0/1
    audio['LeftorRight.keys'] = audio['LeftorRight.keys'].map(rewrite_to_binary)
    # map location
    location_lookup = {'left30': -2.25, 'left60': -4.5 , 'left100': -6.75, 'equal': 0.0, 'right30': 2.25, 'right60': 4.5 , 'right100': 6.75}
    audio['name'] = audio['name'].map(lambda x: location_lookup[x])
    
    video = experiment[pd.notnull(experiment['VisualDecision.keys'])][['noise', 'location', 'VisualDecision.keys']]
    # map l/r to 0/1
    video['VisualDecision.keys'] = video['VisualDecision.keys'].map(rewrite_to_binary)
    # map location
    video['location'] = video['location'].map(extract_location)
    
    combined = experiment[pd.notnull(experiment['CombComparisonResponse.keys'])][['noise', 'location', 'CombComparisonResponse.keys']]
    # map l/r to 0/1
    combined['CombComparisonResponse.keys'] = combined['CombComparisonResponse.keys'].map(rewrite_to_binary)
    # map location
    combined['location'] = combined['location'].map(extract_location)

    
    return audio, video, combined

In [171]:
# AUDIO: make a dictionary of the form 
# {{'loc1': val1, ...,'loc7': val7}}
def analyse_audio(audio):
    res = audio.groupby('name').mean()
    res = res['LeftorRight.keys'].to_dict()
    return res

In [177]:
# VISUAL: make a dictionary of the form 
# {'noise_level1': {'loc1': val1, ...,'loc7': val7}, ...}
def analyse_visual(video):
    noise_levels = [0.10, 0.23, 0.36, 0.49, 0.62]
    video_results = video.groupby(['noise', 'location']).mean()
    results = {}
    for noise in noise_levels:
        video_noise = video_results.loc[noise]
        results_here = video_noise['VisualDecision.keys'].to_dict()
        results[noise] = results_here
    return results

In [159]:
# COMBINED: make a dictionary of the form 
# {'noise_level1': {'loc1': val1, ...,'loc7': val7}, ...}
def analyse_combined(combined):
    noise_levels = [0.10, 0.23, 0.36, 0.49, 0.62]
    comb_results = combined.groupby(['noise', 'location']).mean()
    results = {}
    for noise in noise_levels:
        comb_noise = comb_results.loc[noise]
        results_here = comb_noise['CombComparisonResponse.keys'].to_dict()
        results[noise] = results_here
    return results

In [153]:
audio, video, combined = preprocess_data('participant1_trial_2017_May_24_1609.csv')

## Helper functions

In [34]:
def rewrite_to_binary(lr):
    if lr == 'left':
        return 0
    if lr == 'right':
        return 1
    else:
        print('Something went wrong!')

In [57]:
def extract_location(loc_list):
    return float(loc_list.split(',')[0][1:])

## Apply to all files and save result

In [186]:
for filename in os.listdir('data'):
    if filename.endswith(".csv"): 
        participant = filename.split('_')[0]
        print(participant)
        audio, video, combined = preprocess_data(filename)
        
        audio_res = analyse_audio(audio)
        pickle.dump( audio_res, open( "data/preprocessed/audio/%s.p"%participant, "wb" ) )
        
        video_res = analyse_visual(video)
        pickle.dump( video_res, open( "data/preprocessed/video/%s.p"%participant, "wb" ) )

        comb_res = analyse_combined(combined)
        pickle.dump( comb_res, open( "data/preprocessed/combined/%s.p"%participant, "wb" ) )

participant1
participant2
participant3
participant4
