In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import json

### What we want:
 - function that takes a path to folder of files and returns a folder of processed data
 - think about the parameter space
     - STIM_type
     - SIZE_dot	
     - DIST_dot	
     - AMPL_rot	
     - SPEED_rot	
     - LUM_dot
     - BGLUM

In [2]:
data = pd.read_csv('../raw_data/Test_Folder_2Par/zfdata0.csv')
parameter_space = ['STIM_type','SIZE_dot','DIST_dot','AMPL_rot','SPEED_rot','LUM_dot','BGLUM']
needed_params = [param for param in parameter_space if len(data[param].unique())>1]


if len(needed_params) == 3:
    final_dict = {f'{needed_params[0]}_{param0}':{f'{needed_params[1]}_{param1}':{f'{needed_params[2]}_{param2}':[] \
                    for param2 in data[needed_params[2]].unique()} 
                    for param1 in data[needed_params[1]].unique()} 
                    for param0 in data[needed_params[0]].unique()}
elif len(needed_params) == 2:
    final_dict = {f'{needed_params[0]}_{param0}':{f'{needed_params[1]}_{param1}':[] for param1 in \
                    data[needed_params[1]].unique()} for param0 in data[needed_params[0]].unique()}
else:
    final_dict = {f'{needed_params[0]}_{param0}':[] for param0 in data[needed_params[0]].unique()}

### Counter to separate repeating stimuli    
data.reset_index(inplace=True)
data.counter = 0
counter = 0
first_index = 0
for i, r in data[:-1].iterrows():
    if data.iloc[i+1].STIM_type != r.STIM_type:
        counter += 1
        data.loc[first_index:i,'counter'] = counter
        first_index = i+1


        
for p0 in data[needed_params[0]].unique():
        for p1 in data[needed_params[1]].unique():
            df = data[(data[needed_params[0]] == p0)
                        & (data[needed_params[1]] == p1)].reset_index().drop(
                            columns=['index'])
            df.counter = df.counter.map(
                dict(
                    zip(df.counter.unique(),
                        np.arange(0, len(df.counter.unique()), 1))))

            for n in df.counter.unique():
                df_df = df[df['counter'] == n]

                ### FIRST sort our cumulative artifacts
                new = pd.DataFrame(df_df).set_index('Timestamp').reset_index()
                for i, row in new.iterrows():
                    if i + 1 == len(new):
                        break

                    elif np.abs(new.at[i + 1, 'CUM_angle'] - new.at[i, 'CUM_angle']) >= np.pi/2:
                        #new.at[i + 1,'CUM_angle'] = new.at[i, 'CUM_angle']
                        new.iloc[i+1:, 'CUM_angle'] -= (new.at[i+1, 'CUM_angle'] - new.at[i, 'CUM_angle'])

            ### Get rid of stimulus columns
                new = new.drop(columns=['STIM_type','SIZE_dot','DIST_dot','AMPL_rot','SPEED_rot','LUM_dot','BGLUM'])

                ### Resampling 4 second stimulus into 100 frames per second (10ms) and Interpolating
                tstp = np.arange(0, 4, 4 / len(new))  ### 4 second stimulus
                if len(new['Timestamp']) != len(tstp):
                    tstp = tstp[:-1]
                tstpdate = pd.to_datetime(tstp, unit='s')
                new['Timestamp'] = tstp
                new['tstpdate'] = tstpdate
                new = new.set_index('tstpdate')
                new_df = new.resample('10ms')
                new_df = new_df.first()
                new_df.interpolate(method='linear', inplace=True)

                interp = new_df.copy()

                ### Set first cumulative angle to zero and adjust others
                interp['CUM_angle'] -= interp['CUM_angle'][0]

                ### Add distance column
                # distance = sqrt((x2-x1)**2 + (y2-y1)**2)
                interp['Distance_pts'] = [np.sqrt((interp['X'][row]-interp['X'][row-1])**2 + \
                                        (interp['Y'][row]-interp['Y'][row-1])**2) \
                                        for row in range(0, len(interp), 1)]
                interp['Distance_pts'][0] = 0

                ### Clean timestamps
                interp.insert(0, 'New_timestamp',
                                range(1, 1 + len(interp)))
                interp = interp.drop(columns=['Timestamp']).rename(
                    columns={'New_timestamp': 'Timestamp'})
                interp['Timestamp'] = interp['Timestamp'] / 100

                ### Resetting index to integers
                interp = interp.reset_index()
                interp = interp.drop(columns=['tstpdate', 'level_0'])

                final_dict[f'{needed_params[0]}_{p0}'][
                    f'{needed_params[1]}_{p1}'].append(interp)

In [3]:
test_df = final_dict['STIM_type_4']['AMPL_rot_0.5235989'][2]
test_df

Unnamed: 0,Timestamp,X,Y,ANGLE,CUM_angle,TAIL_P1,TAIL_P2,TAIL_P3,TAIL_P4,TAIL_P5,TAIL_P6,MCURVE_tail,L_EYE,R_EYE,counter,Distance_pts
0,0.01,567.36820,629.34840,4.593124,0.000000,0.0,0.018597,0.032094,0.018596,0.045578,0.045577,0.025577,-0.305475,0.064806,2.0,0.000000
1,0.02,567.32010,629.20500,4.567013,0.026110,0.0,0.018423,0.018423,0.018423,0.058867,0.045404,0.023493,-0.299486,0.067605,2.0,0.151252
2,0.03,567.29605,629.19905,4.579082,0.014041,0.0,0.026613,0.026613,0.013100,0.060338,0.046858,0.030518,-0.299043,0.069783,2.0,0.024775
3,0.04,567.27200,629.19310,4.591152,0.001971,0.0,0.034804,0.034804,0.007778,0.061810,0.048311,0.037543,-0.298600,0.071962,2.0,0.024775
4,0.05,567.26625,629.25920,4.578106,0.015017,0.0,0.019303,0.039547,0.005790,0.053050,0.046301,0.031006,-0.299759,0.067663,2.0,0.066350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,3.95,520.06270,618.47860,3.516169,1.076954,0.0,0.079944,0.066479,0.025992,0.079944,0.052997,0.060789,-0.316873,0.081489,2.0,0.028007
395,3.96,519.96835,618.46115,3.520396,1.072727,0.0,0.057369,0.043888,0.023643,0.070842,0.043895,0.044338,-0.323512,0.073812,2.0,0.095950
396,3.97,519.87400,618.44370,3.524623,1.068500,0.0,0.034793,0.021296,0.021295,0.061740,0.034793,0.027888,-0.330151,0.066135,2.0,0.095950
397,3.98,519.84545,618.42695,3.524748,1.068375,0.0,0.034397,0.020899,0.027647,0.054612,0.047870,0.028510,-0.331482,0.062669,2.0,0.033101


In [4]:
fig = px.line(data_frame=test_df,x='Timestamp',y=['L_EYE','R_EYE','CUM_angle','Distance_pts'])
fig

In [16]:
data = pd.read_json('../preprocessed_data/test.json')

In [17]:
data['STIM_type_0'][0]

['{"Timestamp":{"0":0.01,"1":0.02,"2":0.03,"3":0.04,"4":0.05,"5":0.06,"6":0.07,"7":0.08,"8":0.09,"9":0.1,"10":0.11,"11":0.12,"12":0.13,"13":0.14,"14":0.15,"15":0.16,"16":0.17,"17":0.18,"18":0.19,"19":0.2,"20":0.21,"21":0.22,"22":0.23,"23":0.24,"24":0.25,"25":0.26,"26":0.27,"27":0.28,"28":0.29,"29":0.3,"30":0.31,"31":0.32,"32":0.33,"33":0.34,"34":0.35,"35":0.36,"36":0.37,"37":0.38,"38":0.39,"39":0.4,"40":0.41,"41":0.42,"42":0.43,"43":0.44,"44":0.45,"45":0.46,"46":0.47,"47":0.48,"48":0.49,"49":0.5,"50":0.51,"51":0.52,"52":0.53,"53":0.54,"54":0.55,"55":0.56,"56":0.57,"57":0.58,"58":0.59,"59":0.6,"60":0.61,"61":0.62,"62":0.63,"63":0.64,"64":0.65,"65":0.66,"66":0.67,"67":0.68,"68":0.69,"69":0.7,"70":0.71,"71":0.72,"72":0.73,"73":0.74,"74":0.75,"75":0.76,"76":0.77,"77":0.78,"78":0.79,"79":0.8,"80":0.81,"81":0.82,"82":0.83,"83":0.84,"84":0.85,"85":0.86,"86":0.87,"87":0.88,"88":0.89,"89":0.9,"90":0.91,"91":0.92,"92":0.93,"93":0.94,"94":0.95,"95":0.96,"96":0.97,"97":0.98,"98":0.99,"99":1.0,"100

In [19]:
# Opening JSON file
f = open('../preprocessed_data/test.json')
  
# returns JSON object as a dictionary
data = json.load(f)

In [25]:
df = pd.json_normalize(data[0]['STIM_type_0'])

In [26]:
df

0
1
2
3
4
5
6
7
8
9
10
