In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import get_cmap
from matplotlib.font_manager import FontProperties

# 獲取當前工作目錄
current_dir = os.getcwd()
version3_path = os.path.join(current_dir, "TrafficTDApython", "Version3", "tdamapper", "core_old.py")

from utils.models import *
from utils.utils_v3 import *
from utils.plots import *

try:
    myfont = FontProperties(fname=r"/System/Library/Fonts/PingFang.ttc")
    sns.set(style="whitegrid", font=myfont.get_name())
except Exception as e:
    print(e)

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

dataA2 = pd.read_csv("./Data/A2.csv", low_memory=False)
dataA1 = pd.read_csv("./Data/A1.csv")

[Errno 2] No such file or directory: 'C:\\System\\Library\\Fonts\\PingFang.ttc'


In [124]:
# car data
car_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_1.csv"), encoding='utf-8')
motor_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_2.csv"), encoding='utf-8')
overlap_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_out.csv"), encoding='utf-8')
outlier_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/overlap_data.csv"), encoding='utf-8')
# pass data
pass_data0 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_0.csv"), encoding='utf-8')
pass_data1 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_1.csv"), encoding='utf-8')
pass_data2 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_2.csv"), encoding='utf-8')
pass_data3 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_3.csv"), encoding='utf-8')
pass_data_overlap = pd.read_csv(os.path.join(current_dir, "Data/PassData/overlap_data.csv"), encoding='utf-8')
pass_data_out = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_out.csv"), encoding='utf-8')

# 合併模型輸入資料，總共有 5 種資料
# car_data
# motor_data
car_motor_outlier_overlap = pd.concat([outlier_data, overlap_data], axis=0)
pass_data = pd.concat([pass_data0, pass_data1, pass_data2, pass_data3], axis=0)
pass_outlier_overlap = pd.concat([pass_data_out, pass_data_overlap], axis=0)

# 補回當事者行動狀態大類別名稱
car_data['當事者行動狀態大類別名稱'] = '車的狀態'
motor_data['當事者行動狀態大類別名稱'] = '車的狀態'
car_motor_outlier_overlap['當事者行動狀態大類別名稱'] = '車的狀態'
pass_data['當事者行動狀態大類別名稱'] = '人的狀態'
pass_outlier_overlap['當事者行動狀態大類別名稱'] = '人的狀態'

full_data = pd.concat([car_data, motor_data, car_motor_outlier_overlap, pass_data, pass_outlier_overlap], axis=0)
car_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
motor_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
car_motor_outlier_overlap.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
pass_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
pass_outlier_overlap.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)

In [134]:
select_lst = [
    '天候名稱', '光線名稱', 
    '道路類別-第1當事者-名稱', '速限-第1當事者', 
    
    # 路面狀況
    '路面狀況-路面鋪裝名稱', '路面狀況-路面狀態名稱', '路面狀況-路面缺陷名稱',
    '道路障礙-障礙物名稱', '道路障礙-視距品質名稱', '道路障礙-視距名稱',
    # 號誌
    '號誌-號誌種類名稱', '號誌-號誌動作名稱',
    # 車道
    '車道劃分設施-分道設施-快車道或一般車道間名稱', '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    # 當事人
    '當事者屬-性-別名稱', '當事者事故發生時年齡',
    '保護裝備名稱', '行動電話或電腦或其他相類功能裝置名稱',
    '肇事逃逸類別名稱-是否肇逃',

    # 大類別
    '道路型態大類別名稱', '事故位置大類別名稱',
    '車道劃分設施-分向設施大類別名稱',
    '事故類型及型態大類別名稱', '當事者區分-類別-大類別名稱-車種',
    '車輛撞擊部位大類別名稱-最初', 
    '車輛撞擊部位大類別名稱-其他',

    # 兩個欄位只有兩個觀察值不同
    '肇因研判大類別名稱-主要',
    # '肇因研判大類別名稱-個別',
    '受傷', '死亡',
    
    # 子類別
    '道路型態子類別名稱', '事故位置子類別名稱', '事故類型及型態子類別名稱', '肇因研判子類別名稱-主要',
    '當事者區分-類別-子類別名稱-車種', '當事者行動狀態子類別名稱', '車輛撞擊部位子類別名稱-最初',
    '車輛撞擊部位子類別名稱-其他', '肇因研判子類別名稱-個別'
]

# select data
full_data = full_data[select_lst]
car_data = car_data[select_lst]
motor_data = motor_data[select_lst]
car_motor_outlier_overlap = car_motor_outlier_overlap[select_lst]
select_lst.remove('車輛撞擊部位大類別名稱-最初')
select_lst.remove('行動電話或電腦或其他相類功能裝置名稱')
select_lst.remove('當事者區分-類別-大類別名稱-車種')
pass_data = pass_data[select_lst]
pass_outlier_overlap = pass_outlier_overlap[select_lst]

In [135]:
full_data = pd.get_dummies(full_data)
car_data = pd.get_dummies(car_data)
motor_data = pd.get_dummies(motor_data)
car_motor_outlier_overlap = pd.get_dummies(car_motor_outlier_overlap)
pass_data = pd.get_dummies(pass_data)
pass_outlier_overlap = pd.get_dummies(pass_outlier_overlap)

full_data_X, full_data_y = get_train_test_data(full_data)
car_data_X, car_data_y = get_train_test_data(car_data)
motor_data_X, motor_data_y = get_train_test_data(motor_data)
car_motor_outlier_overlap_X, car_motor_outlier_overlap_y = get_train_test_data(car_motor_outlier_overlap)
pass_data_X, pass_data_y = get_train_test_data(pass_data)
pass_outlier_overlap_X, pass_outlier_overlap_y = get_train_test_data(pass_outlier_overlap)

assert car_data_X.shape[0] + pass_data_X.shape[0] + motor_data_X.shape[0] + car_motor_outlier_overlap_X.shape[0] + pass_outlier_overlap_X.shape[0] == full_data_X.shape[0]

ModelPerformanceNew 為大類別模型 <br/>
ModelPerformanceNewV2 為子類別的納入

In [136]:
import pickle

# Define the data for each model
models = [
    ("pass_outlier_overlap", pass_outlier_overlap_X, pass_outlier_overlap_y),
    ("pass", pass_data_X, pass_data_y),
    ("car_motor_outlier_overlap", car_motor_outlier_overlap_X, car_motor_outlier_overlap_y),
    ("motor", motor_data_X, motor_data_y),
    ("car", car_data_X, car_data_y),
    ("full", full_data_X, full_data_y)
]

# Logistic
for name, X, y in models:
    print(f'{name} logistic start')
    y_log, decision_scores_log, indices_log = logistic_cm_kfold(X.astype(float), y)
    with open(f"./ModelPerformanceNewV2/{name}_performance_log.pkl", "wb") as f:
        pickle.dump({
            'y': y_log,
            'decision_scores': decision_scores_log,
            'indices': indices_log
        }, f)
    print(f'{name} logistic done')

# SVC
for name, X, y in models:
    print(f'{name} svc start')
    y_svc, decision_scores_svc, indices_svc = linear_svc_kfold(X.astype(float), y)
    with open(f"./ModelPerformanceNewV2/{name}_performance_svc.pkl", "wb") as f:
        pickle.dump({
            'y': y_svc,
            'decision_scores': decision_scores_svc,
            'indices': indices_svc
        }, f)
    print(f'{name} svc done')

pass_outlier_overlap logistic start
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 0.1, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
pass_outlier_overlap logistic done
pass logistic start
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
pass logistic done
car_motor_outlier_overlap logistic start
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C':



Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}
Best parameters for this fold: {'C': 100, 'loss': 'squared_hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
pass svc done
car_motor_outlier_overlap svc start
Best parameters for this fold: {'C': 1, 'loss': 'hinge'}
Best parameters for this fold: {'C': 100, 'loss': 'squared_hinge'}
Best parameters for this fold: {'C': 1, 'loss': 'squared_hinge'}
Best parameters for this fold: {'C': 1, 'loss': 'hinge'}
Best parameters for this fold: {'C': 100, 'loss': 'squared_hinge'}
car_motor_outlier_overlap svc done
motor svc start




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
motor svc done
car svc start




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
car svc done
full svc start




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 1, 'loss': 'hinge'}




Best parameters for this fold: {'C': 1, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}
Best parameters for this fold: {'C': 1, 'loss': 'hinge'}
full svc done


