In [None]:
# Step 0. Import packages

In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import auc, roc_curve, precision_recall_curve

import xgboost as xgb
from xgboost import plot_importance, plot_tree

import matplotlib.pyplot as plt
from matplotlib import rc

import pickle
import shap
import os

In [None]:
# Step 1. Import the data (Repeat for each bus stop)

In [11]:
#Load the Bus_Stop file
path = './Bus_Stop/'
file_list = os.listdir(path)
file_list_py = [file for file in file_list if file.endswith('.csv')]

#List to check performance
bus_stop_list = []
bus_number_list = []
error_list = []

#Make model for each file
for i in file_list_py:
    data = pd.read_csv(path + i, encoding = "cp949")
    #Split the data
    X = data.iloc[:,0:8] # Predictors (date, temperature, rain, snow, pm10, pm2.5, covid19, time, bus stop, bus number)
    y = data.iloc[:,10] # Response (#people)
    x_train, x_test, y_train, y_test = tts(X, y,test_size=0.2, random_state= 1)
    params = {
        "eta": 0.1,
        "max_depth": 10
    }
    #Learning
    fit_xgb = xgb.XGBRegressor(**params)
    model = fit_xgb.fit(x_train, y_train)
    #Set file name: busstop_line.pkl
    bus_stop = str(data.loc[1,'BusStop'])
    bus_number = str(data.loc[1,'BusNumber'])
    file_name = bus_stop + '_' + bus_number + '.pkl'
    #Save
    dir_path = './Model/'
    with open(os.path.join(dir_path, file_name), 'wb') as f:
        pickle.dump(model, f)
    #Model test
    mean_absoulte_error = sum(abs(model.predict(x_test) - y_test))/len(y_test)
    bus_stop_list.append(bus_stop)
    bus_number_list.append(bus_number_list)
    error_list.append(mean_absoulte_error)

#Make data frame about performance
pf_dict = {"busStop": bus_stop_list, "busNumber": bus_number_list, "Error": error_list}

In [15]:
pd_df = pd.DataFrame(pf_dict)
pd_df.to_csv('./performance.csv')