# Setup and libraries

## Load the needed libraries

These are the libraries I will be using for this notebook

In [6]:
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import json

%matplotlib inline

# Write files

In [9]:
# This section produces the data for a generative model of the Lunar Lander
# Create a single dataframe with all the data
# each row is a single run of a single model
# each column is a single timestep of a single variable

# NOTE:  There needs to be some thinking here.  I mean, while the position,
# velocity, and angle are all continuous, the thrust is not.  So, we need to
# thinkg about how to interpolate the thrust. I think the data in this case
# needs to be "ragged" in the sense that each row has a different number of
# entries.  However, perhaps we can also just look at the "shortest" run and
# truncate all the other runs to that length. 


# TODO:  This is getting close, but is not there yet.  I want things like
# 'x,x,x' to be something like 'x1,x2,x3' so that I can use the autoencoder
# more easily.  Is that a matter of combining the columns?  I think so.  
# How about keeping a dict to map times to indices?  That would work I think.

def uniform_data_for_autoencoder(info, entries_per_run=100):
    all_data = []
    for model_name in info['models']:
        for run_idx in range(info['number_of_trajectories']):
            df = pd.read_parquet(f'data/lander/{model_name}_{run_idx}_trajectory.parquet')  

            # index plays the role of timestep
            df['timestamp'] = pd.to_datetime(df.index, unit='s')
            df['idx'] = df.index
            df.set_index('timestamp', inplace=True)

            # We now compute the delta t that gives us 100 total sample points for each run
            # We do this by taking the total time of the run and dividing by 100
            total_time = df.index[-1] - df.index[0]
            delta_t = total_time / entries_per_run
            df = df.resample(delta_t).interpolate()

            df = pd.melt(df, 
                        value_vars=['x', 'y', 'vx', 'vy', 'theta', 'vtheta'], 
                        var_name='variable', 
                        ignore_index=False, 
                        value_name='value')
            # How to add a few additional rows to the dataframe
            df.loc[df.index[0]] = ['model_name', model_name]
            df.loc[df.index[-1]] = ['total_time', total_time]
            all_data.append(df)

    # for i,df in enumerate(all_data):
    #     if i == 0:
    #         all_data = pd.DataFrame(df).T
    #     df['run_idx'] = i    
    # all_data = pd.concat(all_data)
    # all_data.to_parquet(filename)
    return all_data
info = json.load(open('data/lander/info.json', 'r'))
all_data = uniform_data_for_autoencoder(info)

In [25]:
entries_per_run=100
df = pd.read_parquet(f'data/lander/better_0_trajectory.parquet')  

# index plays the role of timestep
df['timestamp'] = pd.to_datetime(df.index, unit='s')
df.set_index('timestamp', inplace=True)

# We now compute the delta t that gives us 100 total sample points for each run
# We do this by taking the total time of the run and dividing by 100
total_time = df.index[-1] - df.index[0]
delta_t = total_time / entries_per_run
df = df.resample(delta_t).interpolate()
multi_index = pd.MultiIndex.from_arrays([np.arange(len(df)), df.index], names=('idx', 'timestamp'))
df.index = multi_index
df

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,vx,vy,theta,vtheta,leg1,leg2,action,reward
idx,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1970-01-01 00:00:00.000,0.000082,1.408276,8.304475e-03,-1.174839e-01,-0.000088,-1.881086e-03,0.00,0.00,0.00,-2.248130
1,1970-01-01 00:00:02.020,0.000679,1.386497,7.077576e-03,-1.216244e-01,0.000965,-2.978492e-03,0.00,0.00,0.06,-2.228006
2,1970-01-01 00:00:04.040,0.001276,1.364719,5.850677e-03,-1.257648e-01,0.002018,-4.075898e-03,0.00,0.00,0.12,-2.207882
3,1970-01-01 00:00:06.060,0.001872,1.342940,4.623778e-03,-1.299053e-01,0.003071,-5.173305e-03,0.00,0.00,0.18,-2.187757
4,1970-01-01 00:00:08.080,0.002469,1.321161,3.396879e-03,-1.340457e-01,0.004124,-6.270711e-03,0.00,0.00,0.24,-2.167633
...,...,...,...,...,...,...,...,...,...,...,...
96,1970-01-01 00:03:13.920,0.012658,0.024291,-4.243259e-03,-2.596049e-02,0.004889,-4.540054e-03,0.92,0.92,0.24,91.900647
97,1970-01-01 00:03:15.940,0.012283,0.017877,-3.182451e-03,-1.947037e-02,0.003852,-3.405025e-03,0.94,0.94,0.18,93.925485
98,1970-01-01 00:03:17.960,0.011908,0.011464,-2.121642e-03,-1.298025e-02,0.002816,-2.269995e-03,0.96,0.96,0.12,95.950323
99,1970-01-01 00:03:19.980,0.011533,0.005050,-1.060833e-03,-6.490123e-03,0.001779,-1.134966e-03,0.98,0.98,0.06,97.975162


In [26]:
df = pd.melt(df, 
            value_vars=['x', 'y', 'vx', 'vy', 'theta', 'vtheta'], 
            var_name='variable', 
            ignore_index=False, 
            value_name='value',
            col_level=0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,variable,value
idx,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1970-01-01 00:00:00.000,x,8.220672e-05
1,1970-01-01 00:00:02.020,x,6.788788e-04
2,1970-01-01 00:00:04.040,x,1.275551e-03
3,1970-01-01 00:00:06.060,x,1.872223e-03
4,1970-01-01 00:00:08.080,x,2.468895e-03
...,...,...,...
96,1970-01-01 00:03:13.920,vtheta,-4.540054e-03
97,1970-01-01 00:03:15.940,vtheta,-3.405025e-03
98,1970-01-01 00:03:17.960,vtheta,-2.269995e-03
99,1970-01-01 00:03:19.980,vtheta,-1.134966e-03


In [30]:
df.T

idx,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
timestamp,1970-01-01 00:00:00.000,1970-01-01 00:00:02.020,1970-01-01 00:00:04.040,1970-01-01 00:00:06.060,1970-01-01 00:00:08.080,1970-01-01 00:00:10.100,1970-01-01 00:00:12.120,1970-01-01 00:00:14.140,1970-01-01 00:00:16.160,1970-01-01 00:00:18.180,...,1970-01-01 00:03:03.820,1970-01-01 00:03:05.840,1970-01-01 00:03:07.860,1970-01-01 00:03:09.880,1970-01-01 00:03:11.900,1970-01-01 00:03:13.920,1970-01-01 00:03:15.940,1970-01-01 00:03:17.960,1970-01-01 00:03:19.980,1970-01-01 00:03:22.000
variable,x,x,x,x,x,x,x,x,x,x,...,vtheta,vtheta,vtheta,vtheta,vtheta,vtheta,vtheta,vtheta,vtheta,vtheta
value,0.000082,0.000679,0.001276,0.001872,0.002469,0.003066,0.003662,0.004259,0.004856,0.005452,...,-0.010215,-0.00908,-0.007945,-0.00681,-0.005675,-0.00454,-0.003405,-0.00227,-0.001135,0.0
