# MEIRL

In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
area = "london"
filename = "data/" + area + "/adjacency_matrix.csv"
adjacency_matrix = pd.read_csv(filename, index_col=0)

In [3]:
expert_feature_count = adjacency_matrix.copy()
expert_feature_count["value"] = 0
expert_feature_count = expert_feature_count["value"]
expert_feature_count = expert_feature_count.to_frame()
learner_feature_count = expert_feature_count.copy()
test_feature_count = expert_feature_count.copy()

In [4]:
# cross_validation
all_data = 400
training_number = int(all_data * 0.75)
test_number = int(all_data * 0.25)

# Import expert trajectory

### prepare dataset for training

In [5]:
start_point_list = []
trajectory_length_list = []
for i in range (1,training_number):
    filename = "data/" + area + "/" + str(i)
    file = open(filename, "r")
    expert_trajectory = file.read().split(', ')
    start_point_list.append(expert_trajectory[0])
    trajectory_length = len(expert_trajectory)
    trajectory_length_list.append(trajectory_length)
    for j in expert_trajectory:
        current_count = int(expert_feature_count.loc[int(j), ["value"]].values[0])
        add_count = expert_trajectory.count(j)
        new_count = current_count + add_count 
        expert_feature_count.loc[int(j), ["value"]] = new_count
expert_feature_count = (expert_feature_count - expert_feature_count.min()) / expert_feature_count.max() - expert_feature_count.min()    

### prepare for testing

In [6]:
for i in range (training_number + 1 , all_data):
    filename = "data/" + area + "/" + str(i)
    file = open(filename, "r")
    expert_trajectory = file.read().split(', ')
    for j in expert_trajectory:
        current_count = int(test_feature_count.loc[int(j), ["value"]].values[0])
        add_count = expert_trajectory.count(j)
        new_count = current_count + add_count 
        test_feature_count.loc[int(j), ["value"]] = new_count
test_feature_count = (test_feature_count - test_feature_count.min()) / test_feature_count.max() - test_feature_count.min()    

# Let's train

In [7]:
#Initialization of reward parameter
reward_parameter = expert_feature_count.copy()
reward_parameter.columns = ["value"]
reward_parameter["value"] = np.random.random_sample(reward_parameter.shape[0])

In [8]:
def update_position(current_position, eps, reward):
    
    action_list = np.array(adjacency_matrix[adjacency_matrix[current_position] == 1].index)
    value_list = reward[reward.index.isin(action_list)]

    if np.random.random() > eps:
        
        total_reward_lists = []
        for i in range(len(action_list)):
            immediate_reward = value_list.values[i][0]
            one_step_position = value_list.index[i]
            one_step_action_list = np.array(adjacency_matrix[adjacency_matrix[str(one_step_position)] == 1].index)
            one_step_value_list = reward[reward.index.isin(one_step_action_list)]
            for j in range(len(one_step_value_list)):
                total_reward_list = []
                one_step_reward = one_step_value_list.values[j][0]
                total_reward = immediate_reward + one_step_reward
                total_reward_list = [total_reward, i, j]
                total_reward_lists.append(total_reward_list)
        total_reward_lists = np.array(total_reward_lists)
        highest_reward = np.argmax(total_reward_lists[:,0])
        updated_position = action_list[int(total_reward_lists[highest_reward][1])]

    else:
        updated_position = random.choice(value_list["value"].index.tolist())

    return updated_position

In [9]:
# how many iterations?
Y = []
iteration = 300
for i in range (iteration):
    epsilon = 1 / (i+1)  
    param = 1 / (i+1)
    learner_feature_count["value"] = 0
    for j in range(len(start_point_list)):
        current_position = start_point_list[int(j)]
        trajectory_length = trajectory_length_list[int(j)]
        # agent will walk for 50 steps
        for k in range(trajectory_length):
            current_position = str(update_position(current_position, epsilon, reward_parameter))
            learner_feature_count.loc[int(current_position), ["value"]] += 1
    
    learner_feature_count = (learner_feature_count - learner_feature_count.min()) / (learner_feature_count.max() - learner_feature_count.min() )   
    gradient = (expert_feature_count - learner_feature_count) 
    reward_parameter = (gradient*param) + reward_parameter
    reward_parameter = (reward_parameter - reward_parameter.min()) / (reward_parameter.max() - reward_parameter.min() )   
    
    difference = gradient.abs().sum().values[0]
    Y.append(difference)
    print(i, difference)
    
    correlation_dataframe = pd.concat([learner_feature_count, test_feature_count], axis=1)
    correlation = correlation_dataframe.corr().values[0][1]
    print(correlation)

0 125.300536673
0.00955857751216
1 138.966248038
0.103267048467
2 115.02968508
0.191656687716
3 103.207331731
0.194373753282
4 105.756269061
0.302058507586
5 107.076780627
0.28640567223
6 104.844635628
0.375061212888
7 102.571257355
0.339208033587
8 101.634978229
0.388840334891
9 103.084004443
0.332133252719
10 101.330441899
0.423472147982
11 104.351100574
0.355538356698
12 102.014071295
0.420043934192
13 101.573345259
0.502836128402
14 101.996068848
0.449243826039
15 101.272435897
0.473708119151
16 101.869737259
0.412826273287
17 104.403846154
0.365506026115
18 102.72027972
0.386680397732
19 102.169386169
0.433902755263
20 102.16509535
0.472046541628
21 104.21518636
0.434012761759
22 104.839140271
0.477830056232
23 105.991998877
0.356163019153
24 104.566433566
0.440191617421
25 105.063864707
0.431007873117
26 103.775467775
0.456334252613
27 102.692712551
0.478976375437
28 102.541135163
0.39436513826
29 103.6698392
0.444281944694
30 101.995908347
0.40576299345
31 105.820720992
0.354466

254 105.193051998
0.464121269054
255 102.571637427
0.425078173752
256 105.028508011
0.460538716087
257 106.293599819
0.379113900383
258 104.040825741
0.400311225513
259 105.160377358
0.377736889064
260 104.902564103
0.435260666356
261 105.247777778
0.369035310739
262 105.732545745
0.335734464742
263 105.820617585
0.382463320771
264 105.150220681
0.353976956077
265 103.15679725
0.417228208656
266 103.496370531
0.432674312492
267 103.666788933
0.436604454724
268 103.767119155
0.410353168697
269 104.356151736
0.405472084573
270 106.188507145
0.397603829072
271 106.033038358
0.437014393839
272 105.324230367
0.400846268611
273 104.701235609
0.463465026307
274 105.757537082
0.437385065225
275 103.792510121
0.445717434088
276 104.428618457
0.351479639901
277 106.4725547
0.375974197913
278 104.429974916
0.360669382367
279 105.073066972
0.337828653651
280 105.568654775
0.332330428174
281 103.796550963
0.423803160301
282 105.251725019
0.369322595961
283 106.987826394
0.347271760914
284 107.82883

# Local Depth

In [10]:
#local depth
filename = "data/" + area + "/" + "closeness_centrality.csv"

closeness_centrality = pd.read_csv(filename, index_col=0)
adjacent_node = adjacency_matrix.sum().to_frame()
adjacent_node.columns = ["closeness_centrality"]
adjacent_node.index = adjacent_node.index.map(int)
mean_depth = (closeness_centrality - adjacent_node) / (adjacent_node.shape[0] - 1)
relative_asymmetry = (2*(mean_depth - 1)) / (adjacent_node.shape[0] - 2)
relative_asymmetry = (relative_asymmetry - relative_asymmetry.min()) / (relative_asymmetry.max() - relative_asymmetry.min() )   

In [11]:
correlation_dataframe = pd.concat([relative_asymmetry, test_feature_count], axis=1)
correlation_local_depth = - correlation_dataframe.corr().values[0][1]

In [12]:
correlation_local_depth

0.45406804231592879

# Connectivity 

In [13]:
mean_depth = adjacent_node / (adjacent_node.shape[0] - 1)
relative_asymmetry = (2*(mean_depth - 1)) / (adjacent_node.shape[0] - 2)
relative_asymmetry = (relative_asymmetry - relative_asymmetry.min()) / (relative_asymmetry.max() - relative_asymmetry.min() )   
correlation_dataframe = pd.concat([relative_asymmetry, test_feature_count], axis=1)
correlation_connectivity = correlation_dataframe.corr().values[0][1]
correlation_connectivity

0.32460389310488996

# Global Depth

In [14]:
mean_depth = closeness_centrality / (adjacent_node.shape[0] - 1)
relative_asymmetry = (2*(mean_depth - 1)) / (adjacent_node.shape[0] - 2)
relative_asymmetry = (relative_asymmetry - relative_asymmetry.min()) / (relative_asymmetry.max() - relative_asymmetry.min() )   
correlation_dataframe = pd.concat([relative_asymmetry, test_feature_count], axis=1)
correlation_global_depth = - correlation_dataframe.corr().values[0][1]
correlation_global_depth

0.45406704994670616