In [4]:
import function as f
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# ignore error
import warnings
warnings.filterwarnings('ignore')

### Regression Tree

In [8]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error 
import numpy as np

In [6]:
# Specify the file path of the CSV file
file_path = 'original_data.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# get except the first column
df = df.iloc[:, 1:]
df

Unnamed: 0,dist,delay_count,Total Time
0,25,5,361.152
1,19,4,348.368
2,27,6,361.096
3,37,7,380.200
4,35,8,378.624
...,...,...,...
715,21,5,349.912
716,19,4,347.896
717,31,6,371.592
718,35,8,381.832


In [7]:
# Split the data into train and test sets
train_x, test_x, train_y, test_y = train_test_split(df.drop('Total Time', axis=1), df['Total Time'], test_size=0.3, random_state=42)
# Define cross-validation strategy (e.g., KFold with 5 folds)
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [9]:
# Create Decision Tree Regressor
model_RT = DecisionTreeRegressor()

# Perform cross-validation on the train data
mse_train_RT = -cross_val_score(model_RT, train_x, train_y, cv=kf, scoring='neg_mean_squared_error')

# Calculate average MSE
avg_mse_train_RT = np.mean(mse_train_RT)
avg_mse_train_RT

6.38528657736331

In [10]:
# Fit the model to the train data
model_RT.fit(train_x, train_y)

# Predict on the test data
pred_test_RT = model_RT.predict(test_x)

# Calculate MSE on the test data
mean_squared_error(test_y, pred_test_RT)

6.6865418362369375

### Predict Congestion rates without interference

In [42]:
dist_df = pd.read_csv('results_old_2.csv')
dist_df.head()

#predict the total time and add it to the dataframe
x_data = dist_df[['dist', 'delay_count']]
dist_df['Completion Time alone'] = model_RT.predict(x_data)

In [45]:
dist_df['file_name'] = dist_df['file_name'].str.replace('.csv', '')
dist_df = dist_df[['Truck_id', 'file_name']]
dist_df.head(10)

Unnamed: 0,Truck_id,file_name
0,Truck-100,now_RoutePoints_10_LP_0_0_100_43rep
1,Truck-104,now_RoutePoints_10_LP_0_0_100_43rep
2,Truck-105,now_RoutePoints_10_LP_0_0_100_43rep
3,Truck-107,now_RoutePoints_10_LP_0_0_100_43rep
4,Truck-109,now_RoutePoints_10_LP_0_0_100_43rep
5,Truck-102,now_RoutePoints_10_LP_0_0_100_43rep
6,Truck-106,now_RoutePoints_10_LP_0_0_100_43rep
7,Truck-108,now_RoutePoints_10_LP_0_0_100_43rep
8,Truck-101,now_RoutePoints_10_LP_0_0_100_43rep
9,Truck-103,now_RoutePoints_10_LP_0_0_100_43rep


- Column Name
Truck_id,Route_id,Origin,Destination,Completion_Time_alone,Completion_Time,Congestion_ratio,Path_length,TravelTime_by_Distance,CompletionTime_by_Distance,PickupSta_AT,DropSta_AT,Unnamed: 10,Unnamed: 11,C_max_prev,C_max_now,C_max,

In [63]:
import os

folder_path = 'Results_old_2'

# read folder
subfolder = os.listdir(folder_path)

# read all csv files in the folder
all_dfs = pd.DataFrame()

for folder in subfolder:
    # read all csv files in the folder
    subfolder_path = os.path.join(folder_path, folder)
    files = os.listdir(subfolder_path)
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(subfolder_path, file)
            df = pd.read_csv(file_path)
            df = df[['Truck_id', 'Completion_Time']]
            df['file_name'] = os.path.basename(file)
            
            # concat df
            all_dfs = pd.concat([all_dfs, df])

all_dfs.head()

Unnamed: 0,Truck_id,Completion_Time,file_name
0,Truck-0,344.416,result_prev_RoutePoints_10_LP_0_0_100_10rep_wi...
1,Truck-9,346.688,result_prev_RoutePoints_10_LP_0_0_100_10rep_wi...
2,Truck-8,354.592,result_prev_RoutePoints_10_LP_0_0_100_10rep_wi...
3,Truck-2,355.936,result_prev_RoutePoints_10_LP_0_0_100_10rep_wi...
4,Truck-4,361.056,result_prev_RoutePoints_10_LP_0_0_100_10rep_wi...


In [87]:
df = all_dfs.copy()

# replace the file name with the truck id
df['file_name'] = df['file_name'].str.replace('.csv', '')
df['file_name'] = df['file_name'].str.replace('result_', '')

df['file_name'].str.split('_with_').loc[0]

df['generated_file_name'] = df.apply(lambda row: row['file_name'].split('_with_')[0] if int(row['Truck_id'].split('-')[1]) < 100 else row['file_name'].split('_with_')[1], axis=1)
df.drop('file_name', axis=1, inplace=True)
# rename of column
df.rename(columns={'generated_file_name': 'file_name'}, inplace=True)
df

Unnamed: 0,Truck_id,Completion_Time,file_name
0,Truck-0,344.416,prev_RoutePoints_10_LP_0_0_100_10rep
1,Truck-9,346.688,prev_RoutePoints_10_LP_0_0_100_10rep
2,Truck-8,354.592,prev_RoutePoints_10_LP_0_0_100_10rep
3,Truck-2,355.936,prev_RoutePoints_10_LP_0_0_100_10rep
4,Truck-4,361.056,prev_RoutePoints_10_LP_0_0_100_10rep
...,...,...,...
5,Truck-103,357.200,now_RoutePoints_5_LP_80_10_10_9rep
6,Truck-102,363.000,now_RoutePoints_5_LP_80_10_10_9rep
7,Truck-100,396.520,now_RoutePoints_5_LP_80_10_10_9rep
8,Truck-101,420.336,now_RoutePoints_5_LP_80_10_10_9rep
