# Consolidating data

This file consists of functions that consolidate our disparate datasets into one large dataset that is useful in training our model. 

The goal is to generate a file with 30 columns (this number should be variable), such that each column is a state in time. 

Ideally, this will be done with heirachical data, ie `p1` is the first point in time, and within `p1` you have an x component, y component, etc.

https://pandas.pydata.org/docs/user_guide/advanced.html

## Input data format

It is assumed that the input data with have the columns: `[timestamp,tx,ty,tz,qx,qy,qz,qw]`

## Extracting the data we want

This function will create velocity and acceleration columns.

In [146]:
import numpy as np
import pandas as pd

def extract_features(raw: pd.DataFrame, dropna: bool = False) -> None:
    raw['vx'] = raw['tx'].diff() / raw['timestamp'].diff()
    raw['vy'] = raw['ty'].diff() / raw['timestamp'].diff()
    raw['vz'] = raw['tz'].diff() / raw['timestamp'].diff()

    raw['ax'] = raw['vx'].diff() / raw['timestamp'].diff()
    raw['ay'] = raw['vy'].diff() / raw['timestamp'].diff()
    raw['az'] = raw['vz'].diff() / raw['timestamp'].diff()

    if dropna: raw.dropna(inplace=True)

In [148]:
# test the above functions

df = pd.read_csv("../data/fpv_uzh/indoor_forward_3_davis_with_gt.txt")

extract_features(df, dropna=True)

print(df.head())
print(df['timestamp'])

      timestamp        tx        ty        tz        qx        qy        qz  \
2  1.540820e+09  7.603792  0.242395 -0.753890 -0.269081 -0.661483  0.642058   
3  1.540820e+09  7.603930  0.243608 -0.753434 -0.269001 -0.661389  0.642086   
4  1.540820e+09  7.604787  0.244973 -0.752511 -0.268968 -0.661503  0.641952   
5  1.540820e+09  7.605305  0.246114 -0.751615 -0.269007 -0.661649  0.641821   
6  1.540820e+09  7.605622  0.246968 -0.751001 -0.268870 -0.661571  0.641922   

         qw        vx        vy        vz        ax        ay        az  
2  0.278923 -0.006013  0.009916  0.003732  0.048684 -0.009624  0.023827  
3  0.279157  0.001377  0.012131  0.004557  0.073898  0.022147  0.008241  
4  0.279228  0.008571  0.013648  0.009230  0.071941  0.015168  0.046731  
5  0.279146  0.005177  0.011415  0.008960 -0.033934 -0.022326 -0.002695  
6  0.279229  0.003167  0.008536  0.006144 -0.020107 -0.028794 -0.028158  
2      1.540820e+09
3      1.540820e+09
4      1.540820e+09
5      1.540820e+09
6

In [96]:
temp = pd.DataFrame(df[0:15])
temp

Unnamed: 0,timestamp,tx,ty,tz,vx,vy,vz,ax,ay,az,formatted_time
2,1540820000.0,7.603792,0.242395,-0.75389,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827,2018-10-29 13:37:45
3,1540820000.0,7.60393,0.243608,-0.753434,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241,2018-10-29 13:37:45
4,1540820000.0,7.604787,0.244973,-0.752511,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731,2018-10-29 13:37:45
5,1540820000.0,7.605305,0.246114,-0.751615,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695,2018-10-29 13:37:46
6,1540820000.0,7.605622,0.246968,-0.751001,0.003167,0.008536,0.006144,-0.020107,-0.028794,-0.028158,2018-10-29 13:37:46
7,1540820000.0,7.606477,0.247624,-0.751746,0.008559,0.006565,-0.007456,0.053924,-0.019712,-0.136007,2018-10-29 13:37:46
8,1540820000.0,7.606414,0.245921,-0.752536,-0.000637,-0.01704,-0.0079,-0.091958,-0.236045,-0.004439,2018-10-29 13:37:46
9,1540820000.0,7.606267,0.244575,-0.752779,-0.001469,-0.013451,-0.002426,-0.008326,0.035883,0.054737,2018-10-29 13:37:46
10,1540820000.0,7.606619,0.244098,-0.752546,0.003526,-0.004773,0.002327,0.049956,0.086786,0.047538,2018-10-29 13:37:46
11,1540820000.0,7.606064,0.242268,-0.752454,-0.005554,-0.018298,0.000923,-0.090802,-0.135255,-0.014048,2018-10-29 13:37:46


In [98]:
temp['trial'] = temp.apply(lambda row: [row['tx'], row['ty'], row['tz'], row['vx'], row['vy'], row['vz'], row['ax'], row['ay'], row['az']], axis=1)

In [100]:
temp

Unnamed: 0,timestamp,tx,ty,tz,vx,vy,vz,ax,ay,az,formatted_time,trial
2,1540820000.0,7.603792,0.242395,-0.75389,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827,2018-10-29 13:37:45,"[7.603792324796803, 0.2423950051700759, -0.753..."
3,1540820000.0,7.60393,0.243608,-0.753434,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241,2018-10-29 13:37:45,"[7.603930000953036, 0.2436081092466435, -0.753..."
4,1540820000.0,7.604787,0.244973,-0.752511,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731,2018-10-29 13:37:45,"[7.604787087753476, 0.2449728981985283, -0.752..."
5,1540820000.0,7.605305,0.246114,-0.751615,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695,2018-10-29 13:37:46,"[7.605304834001341, 0.2461144271965232, -0.751..."
6,1540820000.0,7.605622,0.246968,-0.751001,0.003167,0.008536,0.006144,-0.020107,-0.028794,-0.028158,2018-10-29 13:37:46,"[7.605621511806223, 0.2469680180752978, -0.751..."
7,1540820000.0,7.606477,0.247624,-0.751746,0.008559,0.006565,-0.007456,0.053924,-0.019712,-0.136007,2018-10-29 13:37:46,"[7.606477432621101, 0.2476244930624005, -0.751..."
8,1540820000.0,7.606414,0.245921,-0.752536,-0.000637,-0.01704,-0.0079,-0.091958,-0.236045,-0.004439,2018-10-29 13:37:46,"[7.606413773087618, 0.2459205259888919, -0.752..."
9,1540820000.0,7.606267,0.244575,-0.752779,-0.001469,-0.013451,-0.002426,-0.008326,0.035883,0.054737,2018-10-29 13:37:46,"[7.606266852506543, 0.2445753812881742, -0.752..."
10,1540820000.0,7.606619,0.244098,-0.752546,0.003526,-0.004773,0.002327,0.049956,0.086786,0.047538,2018-10-29 13:37:46,"[7.606619489126424, 0.2440981030252994, -0.752..."
11,1540820000.0,7.606064,0.242268,-0.752454,-0.005554,-0.018298,0.000923,-0.090802,-0.135255,-0.014048,2018-10-29 13:37:46,"[7.6060641020981565, 0.2422682743141283, -0.75..."


In [16]:
Row_list =[]
# Iterate over each row
for rows in temp.itertuples():
    # Create list for the current row
    my_list =[rows.tx, rows.ty, rows.tz, rows.vx, rows.vy, rows.vz, rows.ax, rows.ay, rows.az]
    
    # append the list to the final list
    Row_list.append(my_list)

temp['array'] = Row_list
temp.head()

Unnamed: 0,timestamp,tx,ty,tz,vx,vy,vz,ax,ay,az,array
2,1540820000.0,7.603792,0.242395,-0.75389,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827,"[7.603792324796803, 0.2423950051700759, -0.753..."
3,1540820000.0,7.60393,0.243608,-0.753434,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241,"[7.603930000953036, 0.2436081092466435, -0.753..."
4,1540820000.0,7.604787,0.244973,-0.752511,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731,"[7.604787087753476, 0.2449728981985283, -0.752..."
5,1540820000.0,7.605305,0.246114,-0.751615,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695,"[7.605304834001341, 0.2461144271965232, -0.751..."
6,1540820000.0,7.605622,0.246968,-0.751001,0.003167,0.008536,0.006144,-0.020107,-0.028794,-0.028158,"[7.605621511806223, 0.2469680180752978, -0.751..."


In [20]:
temp.iloc[0,1]

7.603792324796803

In [46]:
cols = []
for i in range(10):
    cols.append('traj' + str(i))

print(cols)

['traj0', 'traj1', 'traj2', 'traj3', 'traj4', 'traj5', 'traj6', 'traj7', 'traj8', 'traj9']


In [48]:
flattened = pd.DataFrame([temp.iloc[0:10]['array'].to_numpy().flatten()])
flattened.columns = cols
flattened.head()

Unnamed: 0,traj0,traj1,traj2,traj3,traj4,traj5,traj6,traj7,traj8,traj9
0,"[7.603792324796803, 0.2423950051700759, -0.753...","[7.603930000953036, 0.2436081092466435, -0.753...","[7.604787087753476, 0.2449728981985283, -0.752...","[7.605304834001341, 0.2461144271965232, -0.751...","[7.605621511806223, 0.2469680180752978, -0.751...","[7.606477432621101, 0.2476244930624005, -0.751...","[7.606413773087618, 0.2459205259888919, -0.752...","[7.606266852506543, 0.2445753812881742, -0.752...","[7.606619489126424, 0.2440981030252994, -0.752...","[7.6060641020981565, 0.2422682743141283, -0.75..."


In [54]:
end = pd.DataFrame([temp.iloc[10:15]['array'].to_numpy().flatten()])
end

Unnamed: 0,0,1,2,3,4
0,"[7.604174528485285, 0.2375006440468789, -0.750...","[7.601460630113297, 0.2285115037743149, -0.740...","[7.598659048126195, 0.2141485608194633, -0.715...","[7.597422786178243, 0.195863112719945, -0.6791...","[7.596577235481551, 0.1749967154667119, -0.634..."


In [76]:
output = temp.iloc[10:15]['array'].to_numpy().flatten()
output

array([list([7.604174528485285, 0.2375006440468789, -0.7503911722433537, -0.018895754149107737, -0.04767634814020284, 0.020626838828625302, -0.13341891293663122, -0.2937807159498109, 0.1970425847799735]),
       list([7.601460630113297, 0.2285115037743149, -0.740217505543095, -0.027139009601659644, -0.08989148845284385, 0.10173676402632655, -0.08243263313940413, -0.4221518057217448, 0.8111000255022748]),
       list([7.598659048126195, 0.2141485608194633, -0.7152603745277165, -0.02801577979412451, -0.1436292240852629, 0.24957095313967223, -0.008767689382368393, -0.5373765876008159, 1.4783397763514432]),
       list([7.597422786178243, 0.195863112719945, -0.6791311601970138, -0.012362601794674166, -0.18285421942012386, 0.36129162647520974, 0.15653155607431635, -0.3922493922313531, 1.1172051351806096]),
       list([7.596577235481551, 0.1749967154667119, -0.6345363146482136, -0.00845551503073428, -0.20866417152999225, 0.44594888077799627, 0.03907090490031738, -0.258099767241803, 0.846573

In [84]:
flattened['output'] = [list(output)]


0    [[7.604174528485285, 0.2375006440468789, -0.75...
Name: output, dtype: object


In [None]:
for i in range((len(data) - (m+n))/10):
    j = i*10
    temp = pd.DataFrame(df[j:j+n+m])
    
    

In [114]:
temp.head()

Unnamed: 0,timestamp,tx,ty,tz,vx,vy,vz,ax,ay,az,formatted_time,trial
2,1540820000.0,7.603792,0.242395,-0.75389,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827,2018-10-29 13:37:45,"[7.603792324796803, 0.2423950051700759, -0.753..."
3,1540820000.0,7.60393,0.243608,-0.753434,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241,2018-10-29 13:37:45,"[7.603930000953036, 0.2436081092466435, -0.753..."
4,1540820000.0,7.604787,0.244973,-0.752511,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731,2018-10-29 13:37:45,"[7.604787087753476, 0.2449728981985283, -0.752..."
5,1540820000.0,7.605305,0.246114,-0.751615,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695,2018-10-29 13:37:46,"[7.605304834001341, 0.2461144271965232, -0.751..."
6,1540820000.0,7.605622,0.246968,-0.751001,0.003167,0.008536,0.006144,-0.020107,-0.028794,-0.028158,2018-10-29 13:37:46,"[7.605621511806223, 0.2469680180752978, -0.751..."


In [118]:
temp.iloc[0,len(temp.columns)-2]

'2018-10-29 13:37:45'

In [None]:
temp.insert(0, 'date', data[j,len(data.columns)-1])

In [158]:
def informerStructure(data: pd.DataFrame, m: int, n: int) -> pd.DataFrame:
    # m is going to be the number of input trajectories
    # n is going to be the number of output trajectories
    # the format for the trajectories will be [tx, ty, tz, vx, vy, vz, ax, ay, az]
    # The time stamp will be recorded for the first trajectory
    # There will be m columns each with their own trajectory with the last column being an output of the combined n trajectory vectors
    df['date'] = pd.to_datetime(data['unix_time'], unit='s').dt.strftime('%Y-%m-%d %H:%M:%S')
    data['array'] = data.apply(lambda row: [row['tx'], row['ty'], row['tz'], row['vx'], row['vy'], row['vz'], row['ax'], row['ay'], row['az']], axis=1)
    data.head()

    cols = []
    for i in range(10):
        cols.append('traj' + str(i))
    # print(cols)

    slices = []
    for i in range(len(data) - n):
        j = i*10
        flattened = pd.DataFrame([data.iloc[j:j+m]['array'].to_numpy().flatten()])
        flattened.columns = cols
        
        output = data.iloc[j+m:j+m+n]['array'].to_numpy().flatten()

        flattened['output'] = [list(output)]

        flattened.insert(0, 'date', data[j,len(data.columns)-1])
        # flattened['date'
        
        slices.append(flattened)
    return pd.concat(slices, ignore_index=False)

In [160]:
#TEST ABOVEEE
slices = informerStructure(df, 10, 5)
slice.head()

KeyError: (0, 14)

In [None]:
def informerStructure(data: pd.DataFrame) -> pd.DataFrame:
    # This format will give more of a time-series structure with one input, one output, and a date
    # The columns will be 'date', 'tx', 'ty', tz', 'vx', 'vy', 'vz', 'ax', 'ay', 'az', and the concatenated output in a 3D vector: ['tx', 'ty', 'tz']
    cols = [f"{col}_{i}" for i in range(n) for col in data.columns]
    slices = []
    for i in range(len(data) - n):
        flattened = pd.DataFrame([data[i:i+n].to_numpy().flatten()])
        flattened.columns = cols
        slices.append(flattened)
    return pd.concat(slices, ignore_index=False)

In [61]:
trial = df[0:4]
trial.head()

Unnamed: 0,timestamp,tx,ty,tz,qx,qy,qz,qw,vx,vy,vz,ax,ay,az
2,1540820000.0,7.603792,0.242395,-0.75389,-0.269081,-0.661483,0.642058,0.278923,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827
3,1540820000.0,7.60393,0.243608,-0.753434,-0.269001,-0.661389,0.642086,0.279157,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241
4,1540820000.0,7.604787,0.244973,-0.752511,-0.268968,-0.661503,0.641952,0.279228,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731
5,1540820000.0,7.605305,0.246114,-0.751615,-0.269007,-0.661649,0.641821,0.279146,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695


In [65]:
# temp = np.DataFrame.arange(4)
# print(temp)
trial.loc[:, "trajNum"] = np.arange(len(trial))
trial.loc[:, "slice"] = 1
trial.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trial.loc[:, "slice"] = 1


Unnamed: 0,timestamp,tx,ty,tz,qx,qy,qz,qw,vx,vy,vz,ax,ay,az,trajNum,slice
2,1540820000.0,7.603792,0.242395,-0.75389,-0.269081,-0.661483,0.642058,0.278923,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827,0,1
3,1540820000.0,7.60393,0.243608,-0.753434,-0.269001,-0.661389,0.642086,0.279157,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241,1,1
4,1540820000.0,7.604787,0.244973,-0.752511,-0.268968,-0.661503,0.641952,0.279228,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731,2,1
5,1540820000.0,7.605305,0.246114,-0.751615,-0.269007,-0.661649,0.641821,0.279146,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695,3,1


In [69]:
trial.set_index(['slice', 'trajNum'], inplace=True)
trial.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,timestamp,tx,ty,tz,qx,qy,qz,qw,vx,vy,vz,ax,ay,az
slice,trajNum,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,0,1540820000.0,7.603792,0.242395,-0.75389,-0.269081,-0.661483,0.642058,0.278923,-0.006013,0.009916,0.003732,0.048684,-0.009624,0.023827
1,1,1540820000.0,7.60393,0.243608,-0.753434,-0.269001,-0.661389,0.642086,0.279157,0.001377,0.012131,0.004557,0.073898,0.022147,0.008241
1,2,1540820000.0,7.604787,0.244973,-0.752511,-0.268968,-0.661503,0.641952,0.279228,0.008571,0.013648,0.00923,0.071941,0.015168,0.046731
1,3,1540820000.0,7.605305,0.246114,-0.751615,-0.269007,-0.661649,0.641821,0.279146,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695


## Slicing the data

Now, we want rows of data that represent a specific range of time. In this case, we want 30 points for each new row.

In [18]:
def generate_slices(data: pd.DataFrame, n: int) -> pd.DataFrame:
    # each row in the original data is a "point". Each row in the output 
    # is a list of points of size n. 
    cols = [f"{col}_{i}" for i in range(n) for col in data.columns]
    slices = []
    for i in range(len(data) - n):
        flattened = pd.DataFrame([data[i:i+n].to_numpy().flatten()])
        flattened.columns = cols
        slices.append(flattened)
    return pd.concat(slices, ignore_index=False)

In [20]:
# test the above function for 4 pints in each row

slices = generate_slices(df, 4)
# print(slices.head())
slices.head()

Unnamed: 0,timestamp_0,tx_0,ty_0,tz_0,qx_0,qy_0,qz_0,qw_0,vx_0,vy_0,...,qx_3,qy_3,qz_3,qw_3,vx_3,vy_3,vz_3,ax_3,ay_3,az_3
0,1540820000.0,7.603792,0.242395,-0.75389,-0.269081,-0.661483,0.642058,0.278923,-0.006013,0.009916,...,-0.269007,-0.661649,0.641821,0.279146,0.005177,0.011415,0.00896,-0.033934,-0.022326,-0.002695
0,1540820000.0,7.60393,0.243608,-0.753434,-0.269001,-0.661389,0.642086,0.279157,0.001377,0.012131,...,-0.26887,-0.661571,0.641922,0.279229,0.003167,0.008536,0.006144,-0.020107,-0.028794,-0.028158
0,1540820000.0,7.604787,0.244973,-0.752511,-0.268968,-0.661503,0.641952,0.279228,0.008571,0.013648,...,-0.269057,-0.662028,0.641452,0.279046,0.008559,0.006565,-0.007456,0.053924,-0.019712,-0.136007
0,1540820000.0,7.605305,0.246114,-0.751615,-0.269007,-0.661649,0.641821,0.279146,0.005177,0.011415,...,-0.269372,-0.662669,0.640559,0.279272,-0.000637,-0.01704,-0.0079,-0.091958,-0.236045,-0.004439
0,1540820000.0,7.605622,0.246968,-0.751001,-0.26887,-0.661571,0.641922,0.279229,0.003167,0.008536,...,-0.269935,-0.662853,0.640029,0.279507,-0.001469,-0.013451,-0.002426,-0.008326,0.035883,0.054737


In [22]:
def multiIndex(data: pd.DataFrame, n: int, j: int) -> pd.DataFrame:
    # each row in the original data is a "point". Each row in the output 
    # is a list of points of size n. 
    cols = [f"{col}_{i}" for i in range(n) for col in data.columns]
    slices = []
    # i is the number of slices we want
    for i in range(len(data) - n):
        trial = data.copy()[i:i+n]
        trial.loc[:, "trajNum"] = np.arange(len(trial))
        trial.loc[:, "slice"] = str(j) + ":" + str(i)
        trial.set_index(['slice', 'trajNum'], inplace=True)
        slices.append(trial)
    return pd.concat(slices, ignore_index=False)

In [125]:
slices = multiIndex(df, 10, 0)
# print(slices.head())
print(slices.head(20))
type(slices)

               timestamp       tx       ty       tz      vx      vy      vz  \
slice trajNum                                                                 
0:0   0              0.0  0.08312  0.46529 -0.56110     NaN     NaN     NaN   
      1              0.1  0.10778  0.49733 -0.52509  0.2466  0.3204  0.3601   
      2              0.2  0.14227  0.51815 -0.47645  0.3449  0.2082  0.4864   
      3              0.3  0.15770  0.52431 -0.42075  0.1543  0.0616  0.5570   
      4              0.4  0.18045  0.52961 -0.36707  0.2275  0.0530  0.5368   
      5              0.5  0.19404  0.50781 -0.31943  0.1359 -0.2180  0.4764   
      6              0.6  0.20461  0.47123 -0.24595  0.1057 -0.3658  0.7348   
      7              0.7  0.21739  0.44461 -0.22270  0.1278 -0.2662  0.2325   
      8              0.8  0.20570  0.41468 -0.16866 -0.1169 -0.2993  0.5404   
      9              0.9  0.17935  0.37803 -0.14353 -0.2635 -0.3665  0.2513   
0:1   0              0.1  0.10778  0.49733 -0.52509 

pandas.core.frame.DataFrame

## Consolidate all our original data

Now, we want to consolidate our data from all the other sources.

In [22]:
import os

fpv_data = "../data/fpv_uzh"
random_traj_data = "../data/random_trajectory_100ms"
output_path = "../data/output"
if not os.path.exists(output_path):
    os.makedirs(output_path)

n = 30 # we want 30 points per row
slices = []

# consolidate the fpv data
for filename in filter(lambda p: p.endswith("txt"), os.listdir(fpv_data)):
    filepath = os.path.join(fpv_data, filename)
    df = pd.read_csv(filepath)
    
    extract_features(df)
    # the columns should be handled in the generate slices funciton
    slices.append(generate_slices(df, n))
    
# consolidate the synthetic data
for filename in filter(lambda p: p.endswith("txt"), os.listdir(random_traj_data)):
    filepath = os.path.join(random_traj_data, filename)
    df = pd.read_csv(filepath)
    
    extract_features(df)
    # the columns should be handled in the generate slices funciton
    slices.append(generate_slices(df, n))

consolidated = pd.concat(slices, ignore_index=False)
# consolidated.to_csv(os.path.join(output_path, "consolidated.csv"))

In [127]:
import os

# THIS IS FOR THE MULTI-INDEXING
fpv_data = "../data/fpv_uzh"
random_traj_data = "../data/random_trajectory_100ms"
output_path = "../data/output"
if not os.path.exists(output_path):
    os.makedirs(output_path)

n = 30 # we want 30 points per row
slices = []
j = 0 # hold the slice number for the entire consolidation for the multi-indexing

# consolidate the fpv data
for filename in filter(lambda p: p.endswith("txt"), os.listdir(fpv_data)):
    filepath = os.path.join(fpv_data, filename)
    df = pd.read_csv(filepath)
    
    extract_features(df)
    # the columns should be handled in the generate slices funciton
    slices.append(multiIndex(df, n, j))
    j += 1
    
# consolidate the synthetic data
for filename in filter(lambda p: p.endswith("txt"), os.listdir(random_traj_data)):
    filepath = os.path.join(random_traj_data, filename)
    df = pd.read_csv(filepath)
    
    extract_features(df)
    # the columns should be handled in the generate slices funciton
    slices.append(multiIndex(df, n, j))
    j += 1

consolidated = pd.concat(slices, ignore_index=False)
consolidated.to_csv(os.path.join(output_path, "consolidated.csv"))