# Trajectories

## 1. 加载训练数据

In [36]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from copy import deepcopy

df = pd.read_csv('../dataSets/training/training_20min_avg_travel_time.csv')


trajectories_train = {}
start_time  = datetime.strptime("2016-07-19 00:00:00",'%Y-%m-%d %H:%M:%S')

for idx in range(df.shape[0]):
    line = df.iloc[idx]
    route = str(line['intersection_id']+str(line['tollgate_id']))
    stime = datetime.strptime(line['time_window'][1:20],'%Y-%m-%d %H:%M:%S')
    day = (stime - start_time).days
    tw_n = (stime.hour * 60 + stime.minute)/20
    
    if route not in trajectories_train.keys():
        trajectories_train[route] = np.zeros(72*91).reshape(91,72)
    trajectories_train[route][day][tw_n] = line['avg_travel_time']

## 2. 训练数据划分与模型选择

In [37]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.cross_validation import train_test_split
import xgboost as xgb

##数据预处理
## shuffle
## split
data = trajectories_train
#X_train,X_test,y_train,y_test = train_test_split(data[:,17:23],data[:,24],test_size=0.2,random_state = 33)

##对每个路径的每个时间段的学习一个学习器
clfs = {};
x_tw_list = [np.arange(18,24),np.arange(19,25),np.arange(20,26),np.arange(21,27),np.arange(22,28),np.arange(23,29),\
             np.arange(45,51),np.arange(46,52),np.arange(47,53),np.arange(48,54),np.arange(49,55),np.arange(50,56)]
y_tw_list = [24,25,26,27,28,29,51,52,53,54,55,56]

for route in data.keys():
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        X = data[route][:,tw_x]
        y = data[route][:,tw_y]
        clf = xgb.XGBRegressor(max_depth=2,n_estimators=120,learning_rate=0.05).fit(X,y)
        if route not in clfs.keys():
            clfs[route] = [clf]
        else:
            clfs[route].append(clf)

## 3. 加载测试数据

In [38]:
## 10.18 - 10.24号
df = pd.read_csv('../dataSets/testing_phase1/trajectories_20min_avg_travel_time.csv')
trajectories_test = {}
start_time  = datetime.strptime("2016-10-18 00:00:00",'%Y-%m-%d %H:%M:%S')

for idx in range(df.shape[0]):
    line = df.iloc[idx]
    route = str(line['intersection_id']+str(line['tollgate_id']))
    stime = datetime.strptime(line['time_window'][1:20],'%Y-%m-%d %H:%M:%S')
    day = (stime - start_time).days
    tw_n = (stime.hour * 60 + stime.minute)/20
    
    if route not in trajectories_test.keys():
        trajectories_test[route] = np.zeros(72*7).reshape(7,72)
    trajectories_test[route][day][tw_n] = line['avg_travel_time']

In [39]:
## 对每个路径的每段进行预测
x_tw_list = [np.arange(18,24),np.arange(19,25),np.arange(20,26),np.arange(21,27),np.arange(22,28),np.arange(23,29)\
             ,np.arange(45,51),np.arange(46,52),np.arange(47,53),np.arange(48,54),np.arange(49,55),np.arange(50,56)]
y_tw_list = [24,25,26,27,28,29,51,52,53,54,55,56]

test_data = trajectories_test
for route in test_data.keys():
    i = 0
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        test_data[route][:,tw_y] = np.maximum(clfs[route][i].predict(test_data[route][:,tw_x]),np.zeros(7))

In [40]:
out_data = test_data;
## 数据输出
from datetime import datetime
from datetime import timedelta
from pandas import DataFrame


intersection = []
tollgate = []
time_window = []
avg_time = []
routes = ['A2','A3',"B1","B3","C1","C3"]
for route in routes:
    n_day = 7
    start_time = datetime.strptime('2016-10-18 08:00:00',"%Y-%m-%d %H:%M:%S")
    for day in range(n_day):
        starttime = start_time + timedelta(days=day)
        i = 0
        for k in range(24,30): #(51,57)（24，30）
            time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                               + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
            intersection.append(str(route[0]))
            tollgate.append(str(route[1]))
            avg_time.append(out_data[route][day][k])
            i+=1
            
for route in routes:
    n_day = 7
    start_time = datetime.strptime('2016-10-18 17:00:00',"%Y-%m-%d %H:%M:%S")
    for day in range(n_day):
        starttime = start_time + timedelta(days=day)
        i = 0
        for k in range(51,57): #(51,57)（24，30）
            time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                               + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
            intersection.append(str(route[0]))
            tollgate.append(str(route[1]))
            avg_time.append(out_data[route][day][k])
            i+=1            
            
d = {"intersection_id":intersection,"tollgate_id":tollgate,"time_window":time_window,"avg_travel_time":avg_time}
pd = DataFrame(data=d)
pd.to_csv('out.csv',index=False,columns=["intersection_id","tollgate_id","time_window","avg_travel_time"])

# Volume

## 1. 加载训练数据

In [41]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta

df = pd.read_csv('../dataSets/training/training_20min_avg_volume.csv')


volume_in = {}
volume_out = {}
start_time  = datetime.strptime("2016-09-19 00:00:00",'%Y-%m-%d %H:%M:%S')

for idx in range(df.shape[0]):
    line = df.iloc[idx]
    tollgate = str(line['tollgate_id'])
    stime = datetime.strptime(line['time_window'][1:20],'%Y-%m-%d %H:%M:%S')
    day = (stime - start_time).days
    tw_n = (stime.hour * 60 + stime.minute)/20
    
    if line['direction'] == 0: ##判断进/出流量
        if tollgate not in volume_in.keys():
            volume_in[tollgate] = np.zeros(72*29).reshape(29,72)
        volume_in[tollgate][day][tw_n] = line['volume']
    else:
        if tollgate not in volume_out.keys():
            volume_out[tollgate] = np.zeros(72*29).reshape(29,72)
        volume_out[tollgate][day][tw_n] = line['volume']

## 2. 模型

In [42]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.cross_validation import train_test_split
import xgboost as xgb

##数据预处理
## shuffle
## split
data_in = volume_in
#X_train,X_test,y_train,y_test = train_test_split(data[:,17:23],data[:,24],test_size=0.2,random_state = 33)

##对每个路径的每个时间段的学习一个学习器
clfs_in = {};
x_tw_list = [np.arange(18,24),np.arange(19,25),np.arange(20,26),np.arange(21,27),np.arange(22,28),np.arange(23,29),\
             np.arange(45,51),np.arange(46,52),np.arange(47,53),np.arange(48,54),np.arange(49,55),np.arange(50,56)]
y_tw_list = [24,25,26,27,28,29,51,52,53,54,55,56]

for route in data_in.keys():
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        X = data_in[route][:,tw_x]
        y = data_in[route][:,tw_y]
        clf = xgb.XGBRegressor(max_depth=2,n_estimators=120,learning_rate=0.05).fit(X,y)
        if route not in clfs_in.keys():
            clfs_in[route] = [clf]
        else:
            clfs_in[route].append(clf)


In [43]:
##数据预处理
## shuffle
## split
data_out = volume_out
#X_train,X_test,y_train,y_test = train_test_split(data[:,17:23],data[:,24],test_size=0.2,random_state = 33)

##对每个路径的每个时间段的学习一个学习器
clfs_out = {};
x_tw_list = [np.arange(18,24),np.arange(19,25),np.arange(20,26),np.arange(21,27),np.arange(22,28),np.arange(23,29),\
             np.arange(45,51),np.arange(46,52),np.arange(47,53),np.arange(48,54),np.arange(49,55),np.arange(50,56)]
y_tw_list = [24,25,26,27,28,29,51,52,53,54,55,56]

for route in data_out.keys():
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        X = data_in[route][:,tw_x]
        y = data_in[route][:,tw_y]
        clf = xgb.XGBRegressor(max_depth=2,n_estimators=120,learning_rate=0.05).fit(X,y)
        if route not in clfs.keys():
            clfs_out[route] = [clf]
        else:
            clfs_out[route].append(clf)

## 3.加载测试样本

In [44]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta

df = pd.read_csv('../dataSets/testing_phase1/test1_20min_avg_volume.csv')


volume_in_test = {}
volume_out_test = {}
start_time  = datetime.strptime("2016-10-18 00:00:00",'%Y-%m-%d %H:%M:%S')

for idx in range(df.shape[0]):
    line = df.iloc[idx]
    tollgate = str(line['tollgate_id'])
    stime = datetime.strptime(line['time_window'][1:20],'%Y-%m-%d %H:%M:%S')
    day = (stime - start_time).days
    tw_n = (stime.hour * 60 + stime.minute)/20
    
    if line['direction'] == 0: ##判断进/出流量
        if tollgate not in volume_in_test.keys():
            volume_in_test[tollgate] = np.zeros(72*7).reshape(7,72)
        volume_in_test[tollgate][day][tw_n] = line['volume']
    else:
        if tollgate not in volume_out_test.keys():
            volume_out_test[tollgate] = np.zeros(72*7).reshape(7,72)
        volume_out_test[tollgate][day][tw_n] = line['volume']
        #print volume_out_test[tollgate][day][tw_n]

In [45]:
## 对每个路径的每段进行预测
x_tw_list = [np.arange(18,24),np.arange(19,25),np.arange(20,26),np.arange(21,27),np.arange(22,28),np.arange(23,29)\
             ,np.arange(45,51),np.arange(46,52),np.arange(47,53),np.arange(48,54),np.arange(49,55),np.arange(50,56)]
y_tw_list = [24,25,26,27,28,29,51,52,53,54,55,56]

test_data_in = volume_in_test
for route in test_data_in.keys():
    i = 0
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        test_data_in[route][:,tw_y] = np.maximum(clfs_in[route][i].predict(test_data_in[route][:,tw_x]),np.zeros(7))
        
test_data_out = volume_out_test
for route in test_data_out.keys():
    i = 0
    for tw_x,tw_y in zip(x_tw_list,y_tw_list):
        test_data_out[route][:,tw_y] = np.maximum(clfs_out[route][i].predict(test_data_out[route][:,tw_x]),np.zeros(7))

## 输出

In [46]:
## 输出均值
## 输出格式：对于每个收费站，输出每一天的同一个时间窗的流量。然后每一天第二个时间窗
import pandas as pd

tollgate = []
time_window=[]
direction = []
volume = []

n_day = 7
for tollgate_id in ['1','2','3']:
    agv_volume_in = test_data_in[tollgate_id]
    
    ## 上午进入流量
    i = 0
    for k in [24,25,26,27,28,29]:
        start_time = datetime.strptime('2016-10-18 08:00:00',"%Y-%m-%d %H:%M:%S")
        for day in range(n_day):
            starttime = start_time + timedelta(days=day)
            tollgate.append(tollgate_id)
            time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                               + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
            direction.append(0)
            volume.append(agv_volume_in[day][k])
        i+=1
    
    ## 上午流出流量
    if tollgate_id in test_data_out.keys():
        agv_volume_out = test_data_out[tollgate_id]
        i = 0
        for k in [24,25,26,27,28,29]:
            start_time = datetime.strptime('2016-10-18 08:00:00',"%Y-%m-%d %H:%M:%S")
            for day in range(n_day):
                starttime = start_time + timedelta(days=day)
                tollgate.append(tollgate_id)
                time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                                   + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
                direction.append(1)
                volume.append(agv_volume_out[day][k])
            i+=1

 
for tollgate_id in ['1','2','3']:
    agv_volume_in = test_data_in[tollgate_id]
    
    ## 下午进入流量
    i = 0
    for k in [51,52,53,54,55,56]:
        start_time = datetime.strptime('2016-10-18 17:00:00',"%Y-%m-%d %H:%M:%S")
        for day in range(n_day):
            starttime = start_time + timedelta(days=day)
            tollgate.append(tollgate_id)
            time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                               + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
            direction.append(0)
            volume.append(agv_volume_in[day][k])
        i+=1
    
    ## 下午流出流量
    if tollgate_id in test_data_out.keys():
        agv_volume_out = test_data_out[tollgate_id]
        i = 0
        for k in [51,52,53,54,55,56]:
            start_time = datetime.strptime('2016-10-18 17:00:00',"%Y-%m-%d %H:%M:%S")
            for day in range(n_day):
                starttime = start_time + timedelta(days=day)
                tollgate.append(tollgate_id)
                time_window.append('\"[' + (starttime + timedelta(seconds=1200*i)).strftime("%Y-%m-%d %H:%M:%S")\
                                   + "," + (starttime + timedelta(seconds=1200*i+1200)).strftime("%Y-%m-%d %H:%M:%S") + ')\"')
                direction.append(1)
                volume.append(agv_volume_out[day][k])
            i+=1

d = {"tollgate_id":tollgate,"time_window":time_window,"direction":direction,"volume":volume}
pd = DataFrame(data=d)
pd.to_csv('volume.csv',index=False,columns=["tollgate_id","time_window","direction","volume"])