In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import get_cmap
from matplotlib.font_manager import FontProperties

# 獲取當前工作目錄
current_dir = os.getcwd()
version3_path = os.path.join(current_dir, "TrafficTDApython", "Version3", "tdamapper", "core_old.py")

from models import *
from utils_v3 import *
from plots import *

try:
    myfont = FontProperties(fname=r"/System/Library/Fonts/PingFang.ttc")
    sns.set(style="whitegrid", font=myfont.get_name())
except Exception as e:
    print(e)

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

[Errno 2] No such file or directory: 'C:\\System\\Library\\Fonts\\PingFang.ttc'


In [2]:
dataA2 = pd.read_csv("./Data/A2.csv", low_memory=False)
dataA1 = pd.read_csv("./Data/A1.csv")

In [3]:
# car data
car_data = pd.read_csv(os.path.join(current_dir, "Data/CarData/full.csv"), encoding='utf-8')
# pass data
pass_data = pd.read_csv(os.path.join(current_dir, "Data/PassData/full.csv"), encoding='utf-8')
# full data
car_data['當事者行動狀態大類別名稱'] = 'car'
pass_data['當事者行動狀態大類別名稱'] = 'pass'
full_data = pd.concat([car_data, pass_data], axis=0)
car_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)
pass_data.drop(columns=['當事者行動狀態大類別名稱'], inplace=True)

In [4]:
select_lst = [
    '天候名稱', '光線名稱', 
    '道路類別-第1當事者-名稱', '速限-第1當事者', 
    '路面狀況-路面鋪裝名稱', '路面狀況-路面狀態名稱', '路面狀況-路面缺陷名稱',
    '道路障礙-障礙物名稱', '道路障礙-視距品質名稱', '道路障礙-視距名稱',
    '號誌-號誌種類名稱', '號誌-號誌動作名稱',
    '車道劃分設施-分道設施-快車道或一般車道間名稱', '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '當事者屬-性-別名稱', '當事者事故發生時年齡',
    '保護裝備名稱', '行動電話或電腦或其他相類功能裝置名稱',
    '肇事逃逸類別名稱-是否肇逃',

    # 大類別
    '道路型態大類別名稱', '事故位置大類別名稱',
    '車道劃分設施-分向設施大類別名稱',
    '事故類型及型態大類別名稱', '當事者區分-類別-大類別名稱-車種',
    '車輛撞擊部位大類別名稱-最初', 
    '車輛撞擊部位大類別名稱-其他',

    # 兩個欄位只有兩個觀察值不同
    '肇因研判大類別名稱-主要',
    # '肇因研判大類別名稱-個別',
    '受傷', '死亡'
]

# select data
car_data = car_data[select_lst]
select_lst.remove('車輛撞擊部位大類別名稱-最初')
pass_data = pass_data[select_lst]
select_lst.append('車輛撞擊部位大類別名稱-最初')
select_lst.append('當事者行動狀態大類別名稱')
full_data = full_data[select_lst]

In [11]:
car_data = pd.get_dummies(car_data)
pass_data = pd.get_dummies(pass_data)
full_data = pd.get_dummies(full_data)

car_data_X, car_data_y = get_train_test_data(car_data)
pass_data_X, pass_data_y = get_train_test_data(pass_data)
full_data_X, full_data_y = get_train_test_data(full_data)

assert car_data_X.shape[0] + pass_data_X.shape[0] == full_data_X.shape[0]

In [12]:
import pickle

pass_y_resampled_test_log, pass_decision_scores_log, pass_indices_log = logistic_cm_kfold(pass_data_X.astype(float), pass_data_y)
with open("./ModelPerformance/pass_performance_log.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': pass_y_resampled_test_log,
        'decision_scores': pass_decision_scores_log,
        'indices': pass_indices_log
    }, f)
print('pass logistic done')
car_y_resampled_test_log, car_decision_scores_log, car_indices_log = logistic_cm_kfold(car_data_X.astype(float), car_data_y)
with open("./ModelPerformance/car_performance_log.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': car_y_resampled_test_log,
        'decision_scores': car_decision_scores_log,
        'indices': car_indices_log
    }, f)
print('car logistic done')
full_y_resampled_test_log, full_decision_scores_log, full_indices_log = logistic_cm_kfold(full_data_X.astype(float), full_data_y)
with open("./ModelPerformance/full_performance_log.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': full_y_resampled_test_log,
        'decision_scores': full_decision_scores_log,
        'indices': full_indices_log
    }, f)
print('full logistic done')
pass_y_resampled_test_svc, pass_decision_scores_svc, pass_indices_svc = linear_svc_kfold(pass_data_X.astype(float), pass_data_y)
with open("./ModelPerformance/pass_performance_svc.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': pass_y_resampled_test_svc,
        'decision_scores': pass_decision_scores_svc,
        'indices': pass_indices_svc
    }, f)
print('pass svc done')
car_y_resampled_test_svc, car_decision_scores_svc, car_indices_svc = linear_svc_kfold(car_data_X.astype(float), car_data_y)
with open("./ModelPerformance/car_performance_svc.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': car_y_resampled_test_svc,
        'decision_scores': car_decision_scores_svc,
        'indices': car_indices_svc
    }, f)
print('car svc done')
full_y_resampled_test_svc, full_decision_scores_svc, full_indices_svc = linear_svc_kfold(full_data_X.astype(float), full_data_y)
with open("./ModelPerformance/full_performance_svc.pkl", "wb") as f:
    pickle.dump({
        'y_resampled_test': full_y_resampled_test_svc,
        'decision_scores': full_decision_scores_svc,
        'indices': full_indices_svc
    }, f)
print('full svc done')

Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
pass logistic done
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
car logistic done
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l1'}
Best parameters for this fold: {'C': 10, 'penalty': 'l2'}
full logistic done




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}
pass svc done




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 10, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
car svc done




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
Best parameters for this fold: {'C': 0.1, 'loss': 'hinge'}




Best parameters for this fold: {'C': 100, 'loss': 'hinge'}
Best parameters for this fold: {'C': 0.1, 'loss': 'hinge'}
full svc done


# scores

In [2]:
import pickle

with open("../Version3/ModelPerformance/pass_performance_log.pkl", "rb") as f:
    pass_performance_log = pickle.load(f)
with open("../Version3/ModelPerformance/car_performance_log.pkl", "rb") as f:
    car_performance_log = pickle.load(f)
with open("../Version3/ModelPerformance/full_performance_log.pkl", "rb") as f:
    full_performance_log = pickle.load(f)
with open("../Version3/ModelPerformance/pass_performance_svc.pkl", "rb") as f:
    pass_performance_svc = pickle.load(f)
with open("../Version3/ModelPerformance/car_performance_svc.pkl", "rb") as f:
    car_performance_svc = pickle.load(f)
with open("../Version3/ModelPerformance/full_performance_svc.pkl", "rb") as f:
    full_performance_svc = pickle.load(f)

In [3]:
from sklearn.metrics import confusion_matrix, classification_report, recall_score

def get_score(resampled_test, decision_scores, threshold=0.5):
    
    y_pred = (decision_scores >= threshold).astype(int)

    conf_matrix = confusion_matrix(resampled_test, y_pred)
    recall = recall_score(resampled_test, y_pred)
    classification = classification_report(resampled_test, y_pred, digits=4)
    
    return conf_matrix, recall, classification

In [4]:
pass_mtrx, pass_recall, pass_class = get_score(pass_performance_log['y_resampled_test'], pass_performance_log['decision_scores'], threshold=0.3)
car_mtrx, car_recall, car_class = get_score(car_performance_log['y_resampled_test'], car_performance_log['decision_scores'], threshold=0.4)
full_mtrx, full_recall, full_class = get_score(full_performance_log['y_resampled_test'], full_performance_log['decision_scores'], threshold=0.4)

pass_mtrx_svc, pass_recall_svc, pass_class_svc = get_score(pass_performance_svc['y_resampled_test'], pass_performance_svc['decision_scores'], threshold=-0.5)
car_mtrx_svc, car_recall_svc, car_class_svc = get_score(car_performance_svc['y_resampled_test'], car_performance_svc['decision_scores'], threshold=0.05)
full_mtrx_svc, full_recall_svc, full_class_svc = get_score(full_performance_svc['y_resampled_test'], full_performance_svc['decision_scores'], threshold=0.1)

print(pass_mtrx)
print(car_mtrx)
print(full_mtrx)

print(pass_mtrx_svc)
print(car_mtrx_svc)
print(full_mtrx_svc)

[[2322  716]
 [  26   32]]
[[67079 28008]
 [  133   283]]
[[70301 27824]
 [  144   330]]
[[2332  706]
 [  26   32]]
[[77241 17846]
 [  168   248]]
[[80905 17220]
 [  183   291]]


In [213]:
# save as pickle
with open('../Version3/ModelPerformance/pass_perform.pkl', 'wb') as f:
    pickle.dump({    
        'confusion': pass_mtrx,
        'recall': pass_recall,
        'classification': pass_class
        }, f)
    
with open('../Version3/ModelPerformance/car_perform.pkl', 'wb') as f:
    pickle.dump({    
        'confusion': car_mtrx,
        'recall': car_recall,
        'classification': car_class
        }, f)
    
with open('../Version3/ModelPerformance/full_perform.pkl', 'wb') as f:
    pickle.dump({    
        'confusion': full_mtrx,
        'recall': full_recall,
        'classification': full_class
        }, f)