In [410]:
import os
import pandas as pd

trajectories_path = "trajectories"

# Cleaning a single Trajectory (very bulky will drop a lot when we fix code)

In [411]:
traj_files = [elem for elem in os.listdir(trajectories_path) if elem[0] != '.']

In [460]:
def open_trajectories(traj_path):
    raw_df = pd.read_csv(os.path.join(trajectories_path, traj_path), header= None)
    raw_df.columns = ['objX', 'objY', 'd', 'lr_command', 'ud_command']
    raw_df['objX'] = raw_df.objX.replace(' None', -1).replace('None', -1).astype(int)
    raw_df['objY'] = raw_df.objY.replace(' None', -1).replace('None', -1).astype(int)
    raw_df['t'] = raw_df.index
    return raw_df

def fix_traj(example_traj, columns_off = False):
    if not columns_off:
        return example_traj[['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']]
    no_action_inds = ((example_traj.d == -1) | (example_traj.d > 25)) & ((example_traj.lr_command == 0) & (example_traj.ud_command == 0))
    no_action_entries = example_traj[no_action_inds]
    action_entries = example_traj[~no_action_inds]
    action_entries.columns = ['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']
    return pd.concat([action_entries, no_action_entries], join = 'outer').sort_values('t')[['objX', 'objY', 'lr_command', 'ud_command', 'd', 't']]

In [464]:
example_traj = open_trajectories(traj_files[0])
example_traj = fix_traj(example_traj, columns_off = True)
example_traj_2 = open_trajectories(traj_files[1])
example_traj_2 = fix_traj(example_traj_2, columns_off = False)

In [472]:
all_trajectories = pd.concat([example_traj, example_traj_2])

In [473]:
all_trajectories

Unnamed: 0,objX,objY,lr_command,ud_command,d,t
0,195,124,0.000000,0.000000,-1.000000,0
1,195,124,0.000000,0.000000,-1.000000,1
2,195,124,0.000000,0.000000,-1.000000,2
3,195,124,0.000000,0.000000,-1.000000,3
4,195,124,0.000000,0.000000,-1.000000,4
...,...,...,...,...,...,...
1636,208,168,-9.767019,2.236068,0.153207,1636
1637,208,168,-12.519181,0.000000,5.657536,1637
1638,208,167,-9.024288,1.000000,5.657564,1638
1639,207,167,-11.819303,1.000000,2.194689,1639


# Creating multiple episodes from one (not used)

In [470]:
def get_inds_when_becomes_confused(example_traj):
    last_one = False
    result = []
    for i, row in example_traj.iterrows():
        if row['lr_command'] == 0 and row['ud_command'] == 0:
            if not last_one:
                result.append(i)
            last_one = True
        else:
            last_one = False
    return result

def divide_indices(inds, no_confusions = 5, min_steps = 200):
    raw_result = inds[::no_confusions]
    cleaned = [raw_result[0]]
    for i in inds[1:]:
        if i - cleaned[-1] >= 200:
            cleaned.append(i)
    return cleaned

def divide_traj(example_traj, no_confusions = 5):
    results = []
    inds = get_inds_when_becomes_confused(example_traj)
    dividers = divide_indices(inds, no_confusions, min_steps = 200)
    for i in range(1, len(dividers)):
        if dividers[i] != 0:
            results.append(example_traj.iloc[dividers[i-1]: dividers[i],:])
    return results
        

In [471]:
divide_traj(example_traj, no_confusions = 5)[1]

Unnamed: 0,objX,objY,lr_command,ud_command,d,t
204,227,171,0.000000,0.000000,-1.000000,204
205,227,171,0.000000,0.000000,-1.000000,205
206,229,169,24.024084,-9.578706,2.828427,206
207,229,169,0.000000,0.000000,-1.000000,207
208,229,169,0.000000,0.000000,-1.000000,208
...,...,...,...,...,...,...
422,229,169,0.000000,0.000000,-1.000000,422
423,229,169,0.000000,0.000000,-1.000000,423
424,229,169,0.000000,0.000000,-1.000000,424
425,210,155,6.506955,-3.137891,23.600847,425


# REGRESSION TASK, ESTIMATING POLICY (ACTION) BASED STRAIGHT UPON FEATURES OR MODIFIED FEATURES

In [475]:
import numpy as np

#buckets are [(-1, 0), (0, 10),...], center and discretize
center_x = 20 / 2
center_y = int(15 / 2)
bucketed_x = np.digitize(all_trajectories.objX, [-1] + np.linspace(0, 399, 21)) - 1 - center_x
bucketed_y = np.digitize(all_trajectories.objY, [-1] + np.linspace(0, 299, 16)) - 1 - center_y

# keep only actions which are sent to bot
bucketed_x = np.array([i if i != -10 else 0 for i in bucketed_x])
bucketed_y = np.array([i if i != - int(15 / 2) else 0 for i in bucketed_y])
                      

# boolean d, is valid or not (do we send action)
d_buckets = [0, 25]

bucketed_d = np.array([1 if i == 1 else 0 for i in np.digitize(all_trajectories.d, d_buckets)])

In [503]:
def center_normalize_reshape(x, center = 200, max_val = 400):
    centered_x = np.array((x - center) / max_val)
    return centered_x

centered_x = center_normalize(all_trajectories.objX, center = 200, max_val = 400)
centered_y = center_normalize(all_trajectories.objY, center = 150, max_val = 300)
#centered_x = np.array([centered_x]).T

In [504]:
from sklearn.preprocessing import MinMaxScaler as mms

mms((-1, 1))

MinMaxScaler(feature_range=(-1, 1))

In [505]:
center_normalize(200, center = 200, max_val = 400)

array(0.)

In [506]:
squared_x_buckets = bucketed_x * bucketed_x
squared_y_buckets = bucketed_y * bucketed_y
d_x = bucketed_x * bucketed_d
d_y = bucketed_y * bucketed_d
d_x_sq = squared_x_buckets * bucketed_d
d_x_cu = d_x_sq * bucketed_x


d_centered_x = centered_x * bucketed_d
d_centered_y = centered_y * bucketed_d

centered_x_sq = centered_x * centered_x
centered_y_sq = centered_y * centered_y

d_x_sq = centered_x_sq * bucketed_d
d_y_sq = centered_y_sq * bucketed_d

In [577]:
X = np.array([d_centered_x]).T
y = all_trajectories.lr_command
y_norm = y / 40


x_model = sklearn.linear_model.LinearRegression(fit_intercept = False)
x_model.fit(X, y_norm)
            
print("doing nothing, MSE:", MSE(y_norm, np.full(len(y), np.mean(y))))
print("with policy, MSE:", MSE(y_norm, x_model.predict(X)))

x_model.coef_

doing nothing, MSE: 0.8960059991274786
with policy, MSE: 0.07286096798438983


array([5.8777679])

In [579]:
X = np.array([d_centered_y]).T
y = all_trajectories.ud_command
y_norm = y / 40

y_model = sklearn.linear_model.LinearRegression(fit_intercept = False)
y_model.fit(X, y_norm)

print("doing nothing, MSE:", MSE(y_norm, np.full(len(y), np.mean(y))))
print("with policy, MSE:", MSE(y_norm, y_model.predict(X)))
y_model.coef_

doing nothing, MSE: 2.085661346486914
with policy, MSE: 0.01901513365129206


array([-3.75406534])

In [563]:
def bucket_xyd(x, y, d):
    center_x = 20 / 2
    center_y = int(15 / 2)
    bucketed_x = np.digitize(x, [-1] + np.linspace(0, 399, 21)) - 1 - center_x
    bucketed_y = np.digitize(y, [-1] + np.linspace(0, 299, 16)) - 1 - center_y
    d_buckets = [0, 25]
    bucketed_d = 1 if np.digitize(d, d_buckets) == 1 else 0
    return bucketed_x, bucketed_y, bucketed_d

def normalize_center_reshape_xyd(x, y, d):
    centered_x = center_normalize(x, center = 200, max_val = 400)
    centered_y = center_normalize(y, center = 150, max_val = 300)
    d_buckets = [0, 25]
    bucketed_d = 1 if np.digitize(d, d_buckets) == 1 else 0
    return centered_x, centered_y, bucketed_d

# Left is positive, right is negative, Up is negative down is positive, 
def get_policy(x, y, d, x_model, y_model):
    #x, y, d = bucket_xyd(x, y, d)
    x, y, d = normalize_center_reshape_xyd(x, y, d)
    return x_model.predict(np.array([x * d]).reshape(1, -1))[0] * 40, y_model.predict(np.array([y * d]).reshape(1, -1))[0] * 40

In [564]:
get_policy(0, 0, 20, x_model, y_model)

(-117.5553580267291, 75.08130685603757)

In [565]:
possible_obj_coords = []
for i in range(400):
    for j in range(300):
        possible_obj_coords.append("_".join([str(i), str(j)]))

In [567]:
policy_dict = {}

def clip_actions(a):
    if a < -40:
        return -40
    elif a > 40:
        return 40
    else:
        return a

for s in possible_obj_coords:
    coords = [int(i) for i in s.split("_")]
    x, y, d = coords[0], coords[1], 1
    lr, ud = get_policy(x, y, d, x_model, y_model)
    policy_dict[s] = (clip_actions(lr), clip_actions(ud))
    

In [571]:
pd.DataFrame([{'state': k, 'lr_command': v[0], 'ud_command':v[1]} for k,v in policy_dict.items()]).to_csv('policy.csv')

In [575]:
def make_policy_dict(policy_path):
    policy_df = pd.read_csv('policy.csv')
    return {row['state']: (row['lr_command'], row['ud_command']) for i, row in policy_df.iterrows()}
    
def xy_to_s(x, y):
    return str(x) + "_" + str(y)


make_policy_dict('policy.csv')

{'0_0': (-40.0, 40.0),
 '0_1': (-40.0, 40.0),
 '0_2': (-40.0, 40.0),
 '0_3': (-40.0, 40.0),
 '0_4': (-40.0, 40.0),
 '0_5': (-40.0, 40.0),
 '0_6': (-40.0, 40.0),
 '0_7': (-40.0, 40.0),
 '0_8': (-40.0, 40.0),
 '0_9': (-40.0, 40.0),
 '0_10': (-40.0, 40.0),
 '0_11': (-40.0, 40.0),
 '0_12': (-40.0, 40.0),
 '0_13': (-40.0, 40.0),
 '0_14': (-40.0, 40.0),
 '0_15': (-40.0, 40.0),
 '0_16': (-40.0, 40.0),
 '0_17': (-40.0, 40.0),
 '0_18': (-40.0, 40.0),
 '0_19': (-40.0, 40.0),
 '0_20': (-40.0, 40.0),
 '0_21': (-40.0, 40.0),
 '0_22': (-40.0, 40.0),
 '0_23': (-40.0, 40.0),
 '0_24': (-40.0, 40.0),
 '0_25': (-40.0, 40.0),
 '0_26': (-40.0, 40.0),
 '0_27': (-40.0, 40.0),
 '0_28': (-40.0, 40.0),
 '0_29': (-40.0, 40.0),
 '0_30': (-40.0, 40.0),
 '0_31': (-40.0, 40.0),
 '0_32': (-40.0, 40.0),
 '0_33': (-40.0, 40.0),
 '0_34': (-40.0, 40.0),
 '0_35': (-40.0, 40.0),
 '0_36': (-40.0, 40.0),
 '0_37': (-40.0, 40.0),
 '0_38': (-40.0, 40.0),
 '0_39': (-40.0, 40.0),
 '0_40': (-40.0, 40.0),
 '0_41': (-40.0, 40.0),
 '

In [543]:
import time

start = time.time()
x = policy_dict["0_0"]
end = time.time()
start-end

-7.128715515136719e-05

In [525]:
x, y, d = -.5,-.5, 1
np.array([x * d, x*x*d]).reshape(1, -1)

x_model.predict(np.array([x * d, x*x*d]).reshape(1, -1))

array([-149.63757038])

In [519]:
def evaluate_policy(x_model, y_model, trajectory):
    predictions = []
    for i, row in trajectory:
        

SyntaxError: unexpected EOF while parsing (2127487220.py, line 4)