In [25]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import get_cmap
from matplotlib.font_manager import FontProperties

# 獲取當前工作目錄
current_dir = os.getcwd()
version3_path = os.path.join(current_dir, "TrafficTDApython", "Version3", "tdamapper", "core_old.py")

from utils.models import *
from utils.utils_v3 import *
from utils.plots import *

try:
    myfont = FontProperties(fname=r"/System/Library/Fonts/PingFang.ttc")
    sns.set(style="whitegrid", font=myfont.get_name())
except Exception as e:
    print(e)

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

dataA2 = pd.read_csv("./Data/A2.csv", low_memory=False)
dataA1 = pd.read_csv("./Data/A1.csv")

[Errno 2] No such file or directory: 'C:\\System\\Library\\Fonts\\PingFang.ttc'


In [35]:
# car data
car_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_1.csv"), encoding='utf-8')
motor_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_2.csv"), encoding='utf-8')
overlap_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/full_out.csv"), encoding='utf-8')
outlier_data = pd.read_csv(os.path.join(current_dir, "Data/CarDataNew/overlap_data.csv"), encoding='utf-8')
# pass data
pass_data0 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_0.csv"), encoding='utf-8')
pass_data1 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_1.csv"), encoding='utf-8')
pass_data2 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_2.csv"), encoding='utf-8')
pass_data3 = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_3.csv"), encoding='utf-8')
pass_data_overlap = pd.read_csv(os.path.join(current_dir, "Data/PassData/overlap_data.csv"), encoding='utf-8')
pass_data_out = pd.read_csv(os.path.join(current_dir, "Data/PassData/full_out.csv"), encoding='utf-8')

# 合併模型輸入資料，總共有 5 種資料
# car_data
# motor_data
car_motor_outlier_overlap = pd.concat([outlier_data, overlap_data], axis=0)
pass_data = pd.concat([pass_data0, pass_data1, pass_data2, pass_data3, pass_data_overlap], axis=0)

# 補回當事者行動狀態大類別名稱
car_data['當事者行動狀態大類別名稱'] = '車的狀態'
motor_data['當事者行動狀態大類別名稱'] = '車的狀態'
car_motor_outlier_overlap['當事者行動狀態大類別名稱'] = '車的狀態'
pass_data['當事者行動狀態大類別名稱'] = '人的狀態'
pass_data_out['當事者行動狀態大類別名稱'] = '人的狀態'

full_data = pd.concat([car_data, motor_data, car_motor_outlier_overlap, pass_data, pass_data_out], axis=0)
car_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
motor_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
car_motor_outlier_overlap.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
pass_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)

In [36]:
select_lst = [
    '天候名稱', '光線名稱', 
    '道路類別-第1當事者-名稱', '速限-第1當事者', 
    
    # 路面狀況
    '路面狀況-路面鋪裝名稱', '路面狀況-路面狀態名稱', '路面狀況-路面缺陷名稱',
    '道路障礙-障礙物名稱', '道路障礙-視距品質名稱', '道路障礙-視距名稱',
    # 號誌
    '號誌-號誌種類名稱', '號誌-號誌動作名稱',
    # 車道
    '車道劃分設施-分道設施-快車道或一般車道間名稱', '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    # 當事人
    '當事者屬-性-別名稱', '當事者事故發生時年齡',
    '保護裝備名稱', '行動電話或電腦或其他相類功能裝置名稱',
    '肇事逃逸類別名稱-是否肇逃',

    # 大類別
    '道路型態大類別名稱', '事故位置大類別名稱',
    '車道劃分設施-分向設施大類別名稱',
    '事故類型及型態大類別名稱', '當事者區分-類別-大類別名稱-車種',
    '車輛撞擊部位大類別名稱-最初', 
    '車輛撞擊部位大類別名稱-其他',

    # 兩個欄位只有兩個觀察值不同
    '肇因研判大類別名稱-主要',
    # '肇因研判大類別名稱-個別',
    '受傷', '死亡',
    
    # 子類別
    '道路型態子類別名稱', '事故位置子類別名稱', '事故類型及型態子類別名稱', '肇因研判子類別名稱-主要',
    '當事者區分-類別-子類別名稱-車種', '當事者行動狀態子類別名稱', '車輛撞擊部位子類別名稱-最初',
    '車輛撞擊部位子類別名稱-其他', '肇因研判子類別名稱-個別'
]

# select data
full_data = full_data[select_lst]
car_data = car_data[select_lst]
motor_data = motor_data[select_lst]
car_motor_outlier_overlap = car_motor_outlier_overlap[select_lst]
select_lst.remove('車輛撞擊部位大類別名稱-最初')
select_lst.remove('行動電話或電腦或其他相類功能裝置名稱')
select_lst.remove('當事者區分-類別-大類別名稱-車種')
pass_data = pass_data[select_lst]

In [37]:
full_data = pd.get_dummies(full_data)
car_data = pd.get_dummies(car_data)
motor_data = pd.get_dummies(motor_data)
car_motor_outlier_overlap = pd.get_dummies(car_motor_outlier_overlap)
pass_data = pd.get_dummies(pass_data)
pass_data_outlier = pd.get_dummies(pass_data_out)

full_data_X, full_data_y = get_train_test_data(full_data)
car_data_X, car_data_y = get_train_test_data(car_data)
motor_data_X, motor_data_y = get_train_test_data(motor_data)
car_motor_outlier_overlap_X, car_motor_outlier_overlap_y = get_train_test_data(car_motor_outlier_overlap)
pass_data_X, pass_data_y = get_train_test_data(pass_data)
pass_data_outlier_X, pass_data_outlier_y = get_train_test_data(pass_data_outlier)

assert car_data_X.shape[0] + pass_data_X.shape[0] + motor_data_X.shape[0] + car_motor_outlier_overlap_X.shape[0] + pass_data_outlier_X.shape[0] == full_data_X.shape[0]

ModelPerformance 為不使用kfold大類別 <br/>
ModelPerformanceV2 為不使用kfold子類別納入 <br/>
ModelPerformanceNew 為大類別模型 <br/>
ModelPerformanceNewV2 為子類別納入

In [None]:
import pickle

# Define the data for each model
models = [
    ("pass_outlier", pass_data_outlier_X, pass_data_outlier_y),
    ("pass", pass_data_X, pass_data_y),
    ("car_motor_outlier_overlap", car_motor_outlier_overlap_X, car_motor_outlier_overlap_y),
    ("motor", motor_data_X, motor_data_y),
    ("car", car_data_X, car_data_y),
    ("full", full_data_X, full_data_y)
]

# Logistic
for name, X, y in models:
    print(f'{name} logistic start')
    y_log, decision_scores_log, indices_log = logistic_cm_kfold(X.astype(float), y)
    with open(f"./ModelPerformanceNewV2/{name}_performance_log.pkl", "wb") as f:
        pickle.dump({
            'y': y_log,
            'decision_scores': decision_scores_log,
            'indices': indices_log
        }, f)
    print(f'{name} logistic done')

# SVC
for name, X, y in models:
    print(f'{name} svc start')
    y_svc, decision_scores_svc, indices_svc = linear_svc_kfold(X.astype(float), y)
    with open(f"./ModelPerformanceNewV2/{name}_performance_svc.pkl", "wb") as f:
        pickle.dump({
            'y': y_svc,
            'decision_scores': decision_scores_svc,
            'indices': indices_svc
        }, f)
    print(f'{name} svc done')

# 區分

In [40]:
import pickle

# Define the data for each model
models = [
    ("pass_outlier", pass_data_outlier_X, pass_data_outlier_y),
    ("pass", pass_data_X, pass_data_y),
    ("car_motor_outlier_overlap", car_motor_outlier_overlap_X, car_motor_outlier_overlap_y),
    ("motor", motor_data_X, motor_data_y),
    ("car", car_data_X, car_data_y),
    ("full", full_data_X, full_data_y)
]

# Logistic
for name, X, y in models:
    print(f'{name} logistic start')
    y_log, decision_scores_log = logistic_cm_gridsearch(X.astype(float), y)
    with open(f"./ModelPerformanceV2/{name}_performance_log.pkl", "wb") as f:
        pickle.dump({
            'y': y_log,
            'decision_scores': decision_scores_log,
        }, f)
    print(f'{name} logistic done')

# SVC
for name, X, y in models:
    print(f'{name} svc start')
    y_svc, decision_scores_svc = linear_svc_cm_gridsearch(X.astype(float), y)
    with open(f"./ModelPerformanceV2/{name}_performance_svc.pkl", "wb") as f:
        pickle.dump({
            'y': y_svc,
            'decision_scores': decision_scores_svc,
        }, f)
    print(f'{name} svc done')

pass_outlier logistic start
Best parameters found by GridSearchCV: {'C': 0.01, 'penalty': 'l2'}
pass_outlier logistic done
pass logistic start
Best parameters found by GridSearchCV: {'C': 1, 'penalty': 'l1'}
pass logistic done
car_motor_outlier_overlap logistic start
Best parameters found by GridSearchCV: {'C': 1, 'penalty': 'l1'}
car_motor_outlier_overlap logistic done
motor logistic start
Best parameters found by GridSearchCV: {'C': 1, 'penalty': 'l1'}
motor logistic done
car logistic start
Best parameters found by GridSearchCV: {'C': 1, 'penalty': 'l1'}
car logistic done
full logistic start
Best parameters found by GridSearchCV: {'C': 1, 'penalty': 'l1'}
full logistic done
pass_outlier svc start




Best parameters found by GridSearchCV: {'C': 0.1, 'loss': 'hinge'}
[ 0.14617266 -2.41270093 -1.3237272  -1.70416193  0.1210871  -4.14984293]
pass_outlier svc done
pass svc start




Best parameters found by GridSearchCV: {'C': 100, 'loss': 'hinge'}
[-10.06872251  -5.79985703  -9.32171272 -18.70952599 -17.92811456
  -6.60810713 -13.45905668  -3.1979751  -11.11689957  -5.50701247
  -2.69006364  -8.13258479  -8.63736571  -8.42149495 -28.96228342
 -15.58168191  -3.07646515 -10.31384577  -5.05226699  -0.92187038
 -18.81771695 -15.17737389   1.18758664 -12.78656916  -5.18026987
   2.59073928   0.12049995   0.83870946  -7.69351698 -21.30909245
  -6.04627776 -16.22516114  -2.28156033 -10.49164139  -6.05295703
  -7.99934794]
pass svc done
car_motor_outlier_overlap svc start
Best parameters found by GridSearchCV: {'C': 1, 'loss': 'hinge'}
[ -5.95670528  -4.45632566  -5.2531868   -6.78006503  -3.0433064
  -1.69188962  -3.96797689  -0.2690711   -7.21268936  -5.20698139
  -8.94009796 -10.30170134  -6.45432172  -0.38467308  -5.33303571
 -11.43911547  -2.50545986  -6.02295519  -0.95747589  -5.58349637
  -1.83266256  -1.87190881  -3.26563176  -5.43453252  -3.21828684
 -11.5671162



Best parameters found by GridSearchCV: {'C': 100, 'loss': 'hinge'}
[ -4.07077711  -2.3572722   -1.11064016  -0.13181889  -3.57663358
  -2.0998758   -5.57607267   1.34597457  -7.36654347  -4.75645127
  -2.85917321  -3.60679831  -3.660863    -1.90761142   0.20527698
  -3.38151253  -2.32346211  -1.47860851  -3.43325843 -11.9337866
  -0.51142378  -1.41530805   0.72968659   0.71686177  -3.53835001
  -2.61886822  -3.0836553   -2.8325011   -1.10157905  -1.24382141
  -3.9999911   -6.21287168  -2.84506478  -3.13561635  -4.1662348
  -8.13418705  -1.98985954  -2.27292362  -4.05356545  -0.39906042
  -2.76198947   1.80297294   0.78129424  -5.84778206  -7.24780214
  -3.86331774  -7.18549604  -9.04509505  -5.31359086  -4.19198335
  -2.87183295  -6.03860906  -2.59551521  -3.14175481  -0.82900278
  -1.56062821  -6.02597496  -2.34550256  -3.19600636  -9.63540145
  -6.88076619  -2.52321282  -5.23192646  -1.64135002   0.35704696
  -2.85101205  -0.9631232    3.72218386   0.30715081  -2.92631503
   1.135189



Best parameters found by GridSearchCV: {'C': 100, 'loss': 'hinge'}
[  3.53441355  -8.00107696  -5.44057204 -11.57861236  -1.01912527
  -1.30916685   0.88373479  -7.40686962   1.33115973  -6.24762236
  -4.81140132  -8.40673484  -9.29748255  -5.59254431  -3.18146082
  -3.43109997  -1.21549287 -11.01418362  -6.81317231  -4.97941485
  -1.16986742  -4.79085091 -14.28121017  -9.23758842  -7.44663295
  -7.54486714  -3.04715286  -2.02342147  -5.63896371  -7.97107569
  -2.75985038 -11.10593539  -7.96198532   1.41794611 -12.01605886
  -0.78818741 -11.54396772  -7.33514737  -4.32496202   0.83632534
   0.83650083  -0.60265692   6.40673681  -5.23933019   2.83175641
  -7.37551218  -2.66692738  -6.68557737  -6.51464614  -1.1461801
  -0.718186    -5.40125696   0.83096554 -18.26626864 -15.58613472
  -4.92369713 -17.3080003   -2.43114842   0.53608518  -1.33955667
  -3.18745995   5.0622736   -7.31958621  -4.31231048   3.03525831
 -10.82521066   1.34455846   1.89354171  -5.12154815   2.27635973
  -2.35865

