# Rebuild agent trajectories 

## Import libraries

In [1]:
## Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## Read in data

In [2]:
## Read in data
df = pd.read_csv('../input/output_siouxFalls_trajectory_raw.gz')

## Replace "pt iteraction" by NaN
df['act_type'].replace({"pt interaction": np.nan}, inplace=True)

df.head(20)

Unnamed: 0,person_id,age,time,link_id,x,y,act_type,event_type,length,leg_mode
0,35868_1,28,19563.0,71_1,680946.624547,4821377.0,home,actend,423.466451,
1,35868_1,28,19563.0,71_1,680946.624547,4821377.0,,departure,423.466451,walk
2,14534_2,28,19696.0,37_7,679400.69545,4820574.0,home,actend,467.959651,
3,14534_2,28,19696.0,37_7,679400.69545,4820574.0,,departure,467.959651,car
4,14534_2,28,19696.0,,,,,PersonEntersVehicle,,
5,14534_2,28,19697.0,37_7,679400.69545,4820574.0,,left link,467.959651,
6,14534_2,28,19697.0,38_1,679400.69545,4820574.0,,entered link,467.959651,
7,14534_2,28,19716.0,38_1,679400.69545,4820574.0,,left link,467.959651,
8,14534_2,28,19716.0,38_2,679388.593414,4821042.0,,entered link,467.959651,
9,14534_2,28,19735.0,38_2,679388.593414,4821042.0,,left link,467.959651,


## Fill in NA/NaN values

In [3]:
## Create functions to fill in NA/NaN values

def forward_fill(cols, df, group):
    """
    Forward fill NA/NaN values for given columns in a data frame.
    
    Parameters:
        -cols (list): List of columns to foward fill NA/NaN values.
        -df (DataFrame): DataFrame with NA/NaN values.
        -group (str) : Name of column to groupby 
        
    Returns:
        -df (DataFrame): Returns DataFrame with missing values filled 
    """
    
    # Loop over list of columns 
    for col in cols:
    
        # Forward fill  
        df[col] = df.groupby([group])[col].ffill()
    
    return df 

def back_fill(cols, df, group):
    """
    Backward fill NA/NaN values for given columns in a data frame.
    
    Parameters:
        -cols (list): List of columns to backward fill NA/NaN values.
        -df (DataFrame): DataFrame with NA/NaN values.
        -group (str) : Name of column to groupby 
        
    Returns:
        -df (DataFrame): Returns DataFrame with missing values filled 
    """
    
    # Loop over list of columns 
    for col in cols:
    
        # Backward fill  
        df[col] = df.groupby([group])[col].bfill()
    
    return df 

In [4]:
# Fill in NA/NaN values

group = 'person_id'

## Forward fill NA/NaN values
forward_cols = ['link_id', 'x', 'y', 'length', 'leg_mode']
df = forward_fill(forward_cols, df, group)

## Backward fill NA/NaN values
backward_cols = ['act_type', 'leg_mode']
df = back_fill(backward_cols, df, group)

## Forward fill NA/NaN values
forward_cols = ['act_type']
df = forward_fill(forward_cols, df, group)

## Write out csv 
filename = '../output/output_siouxFalls_trajectory'
compression_options = dict(method='zip', archive_name=f'{filename}.csv')
df.to_csv(f'{filename}.zip', compression=compression_options, index = False)

df.head(20)

Unnamed: 0,person_id,age,time,link_id,x,y,act_type,event_type,length,leg_mode
0,35868_1,28,19563.0,71_1,680946.624547,4821377.0,home,actend,423.466451,walk
1,35868_1,28,19563.0,71_1,680946.624547,4821377.0,work,departure,423.466451,walk
2,14534_2,28,19696.0,37_7,679400.69545,4820574.0,home,actend,467.959651,car
3,14534_2,28,19696.0,37_7,679400.69545,4820574.0,work,departure,467.959651,car
4,14534_2,28,19696.0,37_7,679400.69545,4820574.0,work,PersonEntersVehicle,467.959651,car
5,14534_2,28,19697.0,37_7,679400.69545,4820574.0,work,left link,467.959651,car
6,14534_2,28,19697.0,38_1,679400.69545,4820574.0,work,entered link,467.959651,car
7,14534_2,28,19716.0,38_1,679400.69545,4820574.0,work,left link,467.959651,car
8,14534_2,28,19716.0,38_2,679388.593414,4821042.0,work,entered link,467.959651,car
9,14534_2,28,19735.0,38_2,679388.593414,4821042.0,work,left link,467.959651,car
