In [96]:
import os
import pandas as pd
import sklearn.linear_model, sklearn.metrics
import numpy as np


trajectories_path = "behavioral_cloning/trajectories"

# Cleaning a single Trajectory (very bulky will drop a lot when we fix code)

In [97]:
traj_files = [elem for elem in os.listdir(trajectories_path) if elem[0] != '.']

In [98]:
def open_trajectories(traj_path):
    raw_df = pd.read_csv(os.path.join(trajectories_path, traj_path), header= None)
    raw_df.columns = ['objX', 'objY', 'd', 'lr_command', 'ud_command']
    raw_df['objX'] = raw_df.objX.replace(' None', -1).replace('None', -1).astype(int)
    raw_df['objY'] = raw_df.objY.replace(' None', -1).replace('None', -1).astype(int)
    raw_df['t'] = raw_df.index
    return raw_df

def fix_traj(example_traj, columns_off = False):
    if not columns_off:
        return example_traj[['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']]
    no_action_inds = ((example_traj.d == -1) | (example_traj.d > 25)) & ((example_traj.lr_command == 0) & (example_traj.ud_command == 0))
    no_action_entries = example_traj[no_action_inds]
    action_entries = example_traj[~no_action_inds]
    action_entries.columns = ['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']
    return pd.concat([action_entries, no_action_entries], join = 'outer').sort_values('t')[['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']]

ONLY TWO TRAJECTORIES USED

In [99]:
example_traj = open_trajectories(traj_files[0])
example_traj = fix_traj(example_traj, columns_off = True)

example_traj_2 = open_trajectories(traj_files[1])
example_traj_2 = fix_traj(example_traj_2, columns_off = False)

In [100]:
all_trajectories = pd.concat([example_traj, example_traj_2])

In [101]:
all_trajectories.iloc[70:90, :].head(n = 25)

Unnamed: 0,objX,objY,lr_command,ud_command,d,t
70,195,124,0.0,0.0,-1.0,70
71,195,124,0.0,0.0,-1.0,71
72,195,124,0.0,0.0,-1.0,72
73,195,124,0.0,0.0,-1.0,73
74,190,138,-7.75561,9.306729,14.866069,74
75,190,138,-7.001346,8.401615,0.0,75
76,192,136,-4.453138,10.950209,2.828427,76
77,192,136,0.0,0.0,-1.0,77
78,192,136,0.0,0.0,-1.0,78
79,192,136,0.0,0.0,-1.0,79


In [102]:
all_trajectories.describe()

Unnamed: 0,objX,objY,lr_command,ud_command,d,t
count,4895.0,4895.0,4895.0,4895.0,4895.0,4895.0
mean,200.674974,152.761593,0.87523,-1.454652,2.616318,1356.128907
std,65.961119,47.646873,16.387117,10.891523,26.156727,898.285647
min,-1.0,-1.0,-40.0,-40.0,-40.0,0.0
25%,177.0,131.0,-0.726415,0.0,-1.0,611.5
50%,204.0,161.0,0.0,0.0,-1.0,1223.0
75%,235.0,172.0,2.169158,1.0,1.414214,2029.5
max,372.0,246.0,40.0,40.0,267.860038,3253.0


# Creating multiple episodes from one (not used)

In [103]:
def get_inds_when_becomes_confused(example_traj):
    last_one = False
    result = []
    for i, row in example_traj.iterrows():
        if row['lr_command'] == 0 and row['ud_command'] == 0:
            if not last_one:
                result.append(i)
            last_one = True
        else:
            last_one = False
    return result

def divide_indices(inds, no_confusions = 5, min_steps = 200):
    raw_result = inds[::no_confusions]
    cleaned = [raw_result[0]]
    for i in inds[1:]:
        if i - cleaned[-1] >= 200:
            cleaned.append(i)
    return cleaned

def divide_traj(example_traj, no_confusions = 5):
    results = []
    inds = get_inds_when_becomes_confused(example_traj)
    dividers = divide_indices(inds, no_confusions, min_steps = 200)
    for i in range(1, len(dividers)):
        if dividers[i] != 0:
            results.append(example_traj.iloc[dividers[i-1]: dividers[i],:])
    return results
        

In [104]:
divide_traj(example_traj, no_confusions = 5)[1]

Unnamed: 0,objX,objY,lr_command,ud_command,d,t
204,227,171,0.000000,0.000000,-1.000000,204
205,227,171,0.000000,0.000000,-1.000000,205
206,229,169,24.024084,-9.578706,2.828427,206
207,229,169,0.000000,0.000000,-1.000000,207
208,229,169,0.000000,0.000000,-1.000000,208
...,...,...,...,...,...,...
422,229,169,0.000000,0.000000,-1.000000,422
423,229,169,0.000000,0.000000,-1.000000,423
424,229,169,0.000000,0.000000,-1.000000,424
425,210,155,6.506955,-3.137891,23.600847,425


# REGRESSION TASK

##  Assembling the features, discretized and centered

In [105]:
#buckets are [(-1, 0), (0, 10),...], center and discretize
center_x = 20 / 2
center_y = int(15 / 2)
bucketed_x = np.digitize(all_trajectories.objX, [-1] + np.linspace(0, 399, 21)) - 1 - center_x
bucketed_y = np.digitize(all_trajectories.objY, [-1] + np.linspace(0, 299, 16)) - 1 - center_y

# keep only actions which are sent to bot
bucketed_x = np.array([i if i != -10 else 0 for i in bucketed_x])
bucketed_y = np.array([i if i != - int(15 / 2) else 0 for i in bucketed_y])
                      

# boolean d, is valid or not (do we send action)
d_buckets = [0, 25]

bucketed_d = np.array([1 if i == 1 else 0 for i in np.digitize(all_trajectories.d, d_buckets)])

## Function to center normalize reshape data

In [106]:
def center_normalize_reshape(x, center = 200, max_val = 400):
    centered_x = np.array((x - center) / max_val)
    return centered_x

centered_x = center_normalize_reshape(all_trajectories.objX, center = 200, max_val = 400)
centered_y = center_normalize_reshape(all_trajectories.objY, center = 150, max_val = 300)
#centered_x = np.array([centered_x]).T

## More feature engineering, some unused

In [107]:
squared_x_buckets = bucketed_x * bucketed_x
squared_y_buckets = bucketed_y * bucketed_y
d_x = bucketed_x * bucketed_d
d_y = bucketed_y * bucketed_d
d_x_sq = squared_x_buckets * bucketed_d
d_x_cu = d_x_sq * bucketed_x


d_centered_x = centered_x * bucketed_d
d_centered_y = centered_y * bucketed_d

centered_x_sq = centered_x * centered_x
centered_y_sq = centered_y * centered_y

d_x_sq = centered_x_sq * bucketed_d
d_y_sq = centered_y_sq * bucketed_d

## Using the feature x centered * d 

We regress to come up with model for left right action at each state

In [108]:
X = np.array([d_centered_x]).T
y = all_trajectories.lr_command
y_norm = y / 40


x_model = sklearn.linear_model.LinearRegression(fit_intercept = False)
x_model.fit(X, y_norm)
            
print("doing nothing, MSE:", sklearn.metrics.mean_squared_error(y_norm, np.full(len(y), np.mean(y))))
print("with policy, MSE:", sklearn.metrics.mean_squared_error(y_norm, x_model.predict(X)))

bl_x_model_MSE = sklearn.metrics.mean_squared_error(y_norm, np.full(len(y), np.mean(y)))
x_model_MSE = sklearn.metrics.mean_squared_error(y_norm, x_model.predict(X))
x_model.coef_

doing nothing, MSE: 0.8960059991274786
with policy, MSE: 0.07286096798438983


array([5.8777679])

## Using the feature y centered * d 

We regress to come up with model for up down action at each state

In [109]:
X = np.array([d_centered_y]).T
y = all_trajectories.ud_command
y_norm = y / 40

y_model = sklearn.linear_model.LinearRegression(fit_intercept = False)
y_model.fit(X, y_norm)

           
print("doing nothing, MSE:", sklearn.metrics.mean_squared_error(y_norm, np.full(len(y), np.mean(y))))
print("with policy, MSE:", sklearn.metrics.mean_squared_error(y_norm, x_model.predict(X)))

bl_y_model_MSE = sklearn.metrics.mean_squared_error(y_norm, np.full(len(y), np.mean(y)))
y_model_MSE = sklearn.metrics.mean_squared_error(y_norm, x_model.predict(X))

y_model.coef_

doing nothing, MSE: 2.085661346486914
with policy, MSE: 0.39050556144541504


array([-3.75406534])

In [110]:
def model_str(model, label = "L/R", state_type = 'X'):
    return label + " Action = " + str(model.coef_[0])[:5] + " * " + state_type + '_coord'
model_str(x_model)

'L/R Action = 5.877 * X_coord'

In [111]:
lr_results = pd.DataFrame([{'Baseline MSE': bl_x_model_MSE, 'Model MSE': x_model_MSE, 'Model Formula': model_str(x_model)}, {'Baseline MSE': bl_y_model_MSE, 'Model MSE': y_model_MSE, 'Model Formula': model_str(y_model, "U/D", 'Y')}])
lr_results.index = ['x model', 'y model']
lr
lr_results

Unnamed: 0,Baseline MSE,Model MSE,Model Formula
x model,0.896006,0.072861,L/R Action = 5.877 * X_coord
y model,2.085661,0.390506,U/D Action = -3.75 * Y_coord


## Going from model to policy, get policy takes state fatures and outputs the corresponding action

In [112]:
def bucket_xyd(x, y, d):
    center_x = 20 / 2
    center_y = int(15 / 2)
    bucketed_x = np.digitize(x, [-1] + np.linspace(0, 399, 21)) - 1 - center_x
    bucketed_y = np.digitize(y, [-1] + np.linspace(0, 299, 16)) - 1 - center_y
    d_buckets = [0, 25]
    bucketed_d = 1 if np.digitize(d, d_buckets) == 1 else 0
    return bucketed_x, bucketed_y, bucketed_d

def normalize_center_reshape_xyd(x, y, d):
    centered_x = center_normalize_reshape(x, center = 200, max_val = 400)
    centered_y = center_normalize_reshape(y, center = 150, max_val = 300)
    d_buckets = [0, 25]
    bucketed_d = 1 if np.digitize(d, d_buckets) == 1 else 0
    return centered_x, centered_y, bucketed_d

# Left is positive, right is negative, Up is negative down is positive, 
def get_policy(x, y, d, x_model, y_model):
    #x, y, d = bucket_xyd(x, y, d)
    x, y, d = normalize_center_reshape_xyd(x, y, d)
    return x_model.predict(np.array([x * d]).reshape(1, -1))[0] * 40, y_model.predict(np.array([y * d]).reshape(1, -1))[0] * 40

In [113]:
get_policy(0, 0, 20, x_model, y_model)

(-117.5553580267291, 75.08130685603757)

## Filling the action dictionary for inference, we can do this since there is actually a discrete amount of states, handle the interaction by d using code (we only query states by x and y coordinates, d is handled by implementation since it is a simple conditional

In [114]:
possible_obj_coords = []
for i in range(400):
    for j in range(300):
        possible_obj_coords.append("_".join([str(i), str(j)]))

In [115]:
policy_dict = {}

def clip_actions(a):
    if a < -40:
        return -40
    elif a > 40:
        return 40
    else:
        return a

for s in possible_obj_coords:
    coords = [int(i) for i in s.split("_")]
    x, y, d = coords[0], coords[1], 1
    lr, ud = get_policy(x, y, d, x_model, y_model)
    policy_dict[s] = (clip_actions(lr), clip_actions(ud))
    

## Save policy as csv file

In [116]:
pd.DataFrame([{'state': k, 'lr_command': v[0], 'ud_command':v[1]} for k,v in policy_dict.items()]).to_csv('policy.csv')

## Function to make a policy dict from saved policy csv

In [None]:
def make_policy_dict(policy_path):
    policy_df = pd.read_csv('policy.csv')
    return {row['state']: (row['lr_command'], row['ud_command']) for i, row in policy_df.iterrows()}
    
def xy_to_s(x, y):
    return str(x) + "_" + str(y)


make_policy_dict('policy.csv')

## Timing inference

In [543]:
import time

start = time.time()
x = policy_dict["0_0"]
end = time.time()
start-end

-7.128715515136719e-05

In [525]:
x, y, d = -.5,-.5, 1
np.array([x * d, x*x*d]).reshape(1, -1)

x_model.predict(np.array([x * d, x*x*d]).reshape(1, -1))

array([-149.63757038])