In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

from tdamapper.core import MapperAlgorithm
from tdamapper.cover import CubicalCover
from tdamapper.plot import MapperLayoutInteractive, MapperLayoutStatic
from tdamapper.clustering import FailSafeClustering
from sklearn import metrics

from functions import *
from chi import *
from regressionP import *

In [2]:
data1 = pd.read_csv("./Data/NPA_TMA2_1.csv", low_memory=False)[:-2]
data2 = pd.read_csv("./Data/NPA_TMA2_2.csv", low_memory=False)[:-2]
data3 = pd.read_csv("./Data/NPA_TMA2_3.csv", low_memory=False)[:-2]
data4 = pd.read_csv("./Data/NPA_TMA2_4.csv", low_memory=False)[:-2]
dataA2 = pd.concat([data1, data2, data3, data4], ignore_index=True)

dataA1 = pd.read_csv("./Data/NPA_TMA1.csv")[:-2]

In [3]:
print(data1.shape)
print(data2.shape)
print(data3.shape)
print(data4.shape)
print(dataA1.shape)

(80421, 51)
(66724, 51)
(70842, 51)
(54043, 51)
(1575, 51)


In [19]:
# import pickle

# with open('CalculatedData/new1.pkl', 'wb') as f:
#     pickle.dump(mapper_graph1, f)

# import pickle

# with open('CalculatedData/full.pkl', 'rb') as f:
#     mapper_graph1 = pickle.load(f)

In [8]:
def preprocess(input_data, select_lst, sample = 592):
    sample_data = input_data[input_data['當事者順位'] == 1].reset_index(drop=True, inplace=False).sample(sample).reset_index(drop=True)
    # sample_data = sample_data[sample_data['發生月份'] < 3]
    dataA = sample_data[select_lst]
    
    death_injury_data = split_death_injury(dataA['死亡受傷人數'])
    dist_df = pd.concat([dataA, death_injury_data], axis=1)
    dist_df.drop(columns=['死亡受傷人數'], inplace=True)
    
    return dist_df, sample_data

select_lst = [
    '天候名稱', 
    '路面狀況-路面狀態名稱',
    '肇因研判大類別名稱-主要', '當事者屬-性-別名稱', '當事者事故發生時年齡', 
    '車輛撞擊部位大類別名稱-最初',
    '光線名稱',
    '道路類別-第1當事者-名稱',
    '速限-第1當事者', 
    '道路型態大類別名稱',
    '事故位置大類別名稱', 
    '號誌-號誌種類名稱',
    '車道劃分設施-分向設施大類別名稱', '車道劃分設施-分道設施-快車道或一般車道間名稱',
    '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '事故類型及型態大類別名稱',
    '死亡受傷人數',
    '經度', '緯度',
    # '道路型態子類別名稱', '事故位置子類別名稱', '車道劃分設施-分向設施子類別名稱', '事故類型及型態子類別名稱', 
    # '當事者行動狀態子類別名稱', '車輛撞擊部位子類別名稱-最初', '車輛撞擊部位子類別名稱-其他', '肇因研判子類別名稱-個別',
]

dist_dfA1 = preprocess(dataA1, select_lst, sample = 592)
dist_dfA2 = preprocess(dataA2, select_lst, sample = 20000) # 120420

    
rbind_data = pd.concat([dist_dfA1[0], dist_dfA2[0]], axis=0, ignore_index=True)

rbind_data.loc[rbind_data['受傷'] > 1, '受傷'] = 2
rbind_data['速限-第1當事者'] = rbind_data['速限-第1當事者'].apply(lambda x: 1 if x > 60 else 0)
rbind_data = process_age(rbind_data)

dist_df = process_data(rbind_data)
scaler = StandardScaler()

full_dist = pd.DataFrame(scaler.fit_transform(dist_df), columns = dist_df.columns)
X1 = full_dist.drop(['受傷', '死亡', '經度', '緯度'], axis=1).to_numpy()

full_dist.head()

Unnamed: 0,天候名稱,路面狀況-路面狀態名稱,肇因研判大類別名稱-主要,當事者屬-性-別名稱,當事者事故發生時年齡,車輛撞擊部位大類別名稱-最初,光線名稱,道路類別-第1當事者-名稱,速限-第1當事者,道路型態大類別名稱,...,號誌-號誌種類名稱,車道劃分設施-分向設施大類別名稱,車道劃分設施-分道設施-快車道或一般車道間名稱,車道劃分設施-分道設施-快慢車道間名稱,車道劃分設施-分道設施-路面邊線名稱,事故類型及型態大類別名稱,經度,緯度,死亡,受傷
0,-0.447077,-0.330633,0.509006,0.73353,-1.101487,-0.377729,-1.167354,-0.263929,-0.158236,1.19108,...,-0.710033,-1.615495,1.112845,0.42626,-0.910906,-0.149625,-1.019358,-1.713087,5.561365,-2.623517
1,-0.447077,-0.330633,0.172155,0.73353,1.554879,-1.915529,-1.167354,-0.263929,-0.158236,1.19108,...,-0.710033,-1.615495,1.112845,0.42626,1.097808,-2.634358,0.34133,0.799452,5.561365,-0.550157
2,-0.447077,-0.330633,0.509006,0.73353,-0.216032,1.160071,-1.167354,2.080018,6.319681,-0.826832,...,0.455916,-1.615495,1.112845,-1.678668,-0.910906,-0.149625,-0.540469,-1.960891,5.561365,1.523203
3,-0.447077,-0.330633,0.172155,0.73353,0.669424,-1.915529,1.470144,-1.826561,6.319681,1.19108,...,-0.710033,-1.615495,1.760529,0.42626,-0.910906,-2.634358,-1.138011,-0.936608,5.561365,-2.623517
4,-0.447077,-0.330633,0.172155,-1.335911,1.554879,-1.915529,-1.167354,-0.263929,-0.158236,-0.826832,...,1.621864,-0.221355,-0.830207,0.42626,1.097808,-2.634358,0.301876,0.813273,5.561365,-0.550157


In [8]:
full_dist.shape

(12433, 29)

In [9]:
def find_ratio(input_data, components) :
    best_comp = {}
    for comp in range(1,components+1):   
        pca = PCA(comp).fit(input_data)
        
        best_comp[comp] = pca.explained_variance_ratio_.sum()
        
    max_comp = max(best_comp, key=best_comp.get)  # 使用 key=best_comp.get 找到最大值的鍵
    print("最佳成分數：", max_comp)
    print("解釋方差比率累計值：", best_comp[max_comp])

lens1 = find_ratio(X1, 3)

最佳成分數： 3
解釋方差比率累計值： 0.28639265429798844


In [9]:
lens1 = PCA(10).fit_transform(X1)

mapper_algo1 = MapperAlgorithm(
    cover = CubicalCover(
        n_intervals = 3,
        overlap_frac = 0.3
    ),
    clustering = FailSafeClustering(
        clustering = AgglomerativeClustering(3, linkage='ward'),
        verbose = False)
)

mapper_graph1 = mapper_algo1.fit_transform(X1, lens1)

In [11]:
# mapper_plot1 = MapperLayoutInteractive(
#     mapper_graph1,
#     colors = dist_df[['事故類型及型態大類別名稱']].to_numpy(),
#     cmap = 'jet',
#     # agg = np.nanmean,
#     agg = most_frequent_nonan,
#     dim = 3,
#     iterations = 30,
#     seed = 5,
#     width = 800,
#     height = 500)

# fig_mean1 = mapper_plot1.plot()
# fig_mean1.show(config={'scrollZoom': True})

In [9]:
x = vars(mapper_plot1._MapperLayoutInteractive__fig)['_data_objs'][1]['x']
y = vars(mapper_plot1._MapperLayoutInteractive__fig)['_data_objs'][1]['y']
z = vars(mapper_plot1._MapperLayoutInteractive__fig)['_data_objs'][1]['z']

threeDimData = pd.DataFrame({'x': x, 'y': y, 'z': z})

import re
data_tuple = vars(mapper_plot1._MapperLayoutInteractive__fig)['_data_objs'][1]['text']

data = []
for item in data_tuple:
    color = int(re.search(r'color: (-?\d+)', item).group(1))
    node = int(re.search(r'node: (\d+)', item).group(1))
    size = int(re.search(r'size: (\d+)', item).group(1))
    data.append({'color': color, 'node': node, 'size': size})
component_info = pd.DataFrame(data)

full_info = pd.concat([component_info, threeDimData], axis=1)

mp_content_origin = vars(mapper_plot1._MapperLayoutInteractive__graph)['_node']

mp_content = pd.DataFrame.from_dict(mp_content_origin, orient='index')
mp_content.reset_index(inplace=True)
mp_content.rename(columns={'index': 'node'}, inplace=True)

full_info = pd.merge(full_info, mp_content, on=['node', 'size'], how='inner')

In [1]:
# import plotly.graph_objects as go

# calinski_data = get_calinski_from_db(full_info, 0.0071)
# labels = calinski_data[3]
# db = calinski_data[2]
# n_clusters_ = calinski_data[4]

# unique_labels = set(labels)
# core_samples_mask = np.zeros_like(labels, dtype=bool)
# core_samples_mask[db.core_sample_indices_] = True

# def matplotlib_to_plotly(cmap, alpha=1):
#     """rgba"""
#     return f'rgba({int(cmap[0]*200)}, {int(cmap[1]*200)}, {int(cmap[2]*200)}, {alpha})'

# # colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]  
# colors = [matplotlib_to_plotly(plt.cm.Spectral(each), alpha=0.8) for each in np.linspace(0, 1, len(unique_labels))]
# fig = go.Figure()

# for k, col in zip(unique_labels, colors):
#     if k == -1:
#         # col = 'rgba(0,0,0,0)'
#         col = 'rgba(0,0,0,0)'

#     class_member_mask = labels == k

#     core_samples = full_info.iloc[:, 3:6][class_member_mask & core_samples_mask]
#     fig.add_trace(go.Scatter3d(
#         x=core_samples.iloc[:, 0],
#         y=core_samples.iloc[:, 1],
#         z=core_samples.iloc[:, 2],
#         mode='markers',
#         marker=dict(
#             size=6,
#             color=col,
#             opacity=0.8
#         ),
#         name=f'Cluster {k} Core'
#     ))

#     non_core_samples = full_info.iloc[:, 3:6][class_member_mask & ~core_samples_mask]
#     fig.add_trace(go.Scatter3d(
#         x=non_core_samples.iloc[:, 0],
#         y=non_core_samples.iloc[:, 1],
#         z=non_core_samples.iloc[:, 2],
#         mode='markers',
#         marker=dict(
#             size=6,
#             color=col,
#             opacity=0.5
#         ),
#         name=f'Cluster {k} Non-Core'
#     ))

# fig.update_layout(
#     title=f"Estimated number of clusters: {n_clusters_}",
#     margin=dict(l=0, r=0, b=0, t=0)
# )

# fig.show()

In [38]:
from chi import *

label_0 = full_info[full_info['label'] == 0]
label_1 = full_info[full_info['label'] == 1]
label_2 = full_info[full_info['label'] == 2]

count_0 = get_count_dict(label_0)
count_1 = get_count_dict(label_1)
count_2 = get_count_dict(label_2)

print(full_info['label'].unique())

[ 0  1 -1  2]


In [9]:
full_0 = rbind_data.loc[count_0.keys()]
full_1 = rbind_data.loc[count_1.keys()]
full_2 = rbind_data.loc[count_2.keys()]

lst01 = list(count_0.keys() & count_1.keys())
lst02 = list(count_0.keys() & count_2.keys())
lst12 = list(count_1.keys() & count_2.keys())
# 將重複的key另外拉出進行分析，這裡drop是為了符合卡方的獨立性前提假設
full_01 = full_0.loc[lst01]
full_02 = full_0.loc[lst02]
full_12 = full_1.loc[lst12]

full_0 = full_0.drop(lst01, errors='ignore')
full_0 = full_0.drop(lst02, errors='ignore')
full_1 = full_1.drop(lst01, errors='ignore')
full_1 = full_1.drop(lst12, errors='ignore')
full_2 = full_2.drop(lst02, errors='ignore')
full_2 = full_2.drop(lst12, errors='ignore')

print('01連接點數量', len(lst01))
for key1 in lst01:
    del count_0[key1]
    del count_1[key1]
print('02連接點數量', len(lst02))
for key2 in lst02:
    del count_0[key2]
    del count_2[key2]
print('12連接點數量', len(lst12))
for key3 in lst12:
    del count_1[key3]
    del count_2[key3]

full_0 = add_count(full_0, count_0)
full_1 = add_count(full_1, count_1)
full_2 = add_count(full_2, count_2)

print('各分群相加', full_0.shape[0] + full_1.shape[0] + full_2.shape[0])
print('各分群大小', full_0.shape, full_1.shape, full_2.shape)
# print('權重', full_0['count'].sum(), full_1['count'].sum(), full_2['count'].sum())

01連接點數量 100
02連接點數量 0
12連接點數量 5
各分群相加 120863
各分群大小 (118482, 30) (2370, 30) (11, 30)


In [10]:
lst_regression = [
    '天候名稱',
    '路面狀況-路面狀態名稱',
    '肇因研判大類別名稱-主要', '當事者屬-性-別名稱', '當事者事故發生時年齡', 
    '車輛撞擊部位大類別名稱-最初',
    '光線名稱',
    '道路類別-第1當事者-名稱', 
    '速限-第1當事者', 
    '道路型態大類別名稱', 
    '事故位置大類別名稱',
    '號誌-號誌種類名稱',
    '車道劃分設施-分向設施大類別名稱', '車道劃分設施-分道設施-快車道或一般車道間名稱',
    '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '事故類型及型態大類別名稱',
]

# cluster0_X, cluster0_y = get_clusterN_logit(full_0)
# cluster1_X, cluster1_y = get_clusterN_logit(full_1)
# cluster2_X, cluster2_y = get_clusterN_logit(full_2)

In [19]:
# cluster0_data = get_logit_data(cluster0_X, cluster0_y, lst_regression)
# cluster1_data = get_logit_data(cluster1_X, cluster1_y, lst_regression)
# cluster1_data = get_logit_data(cluster1_X, cluster1_y, lst_regression)

In [11]:
def calculate_proportions(full, category_column):
    # 計算受傷比例
    grouped1 = full.groupby([category_column, '受傷']).size().unstack(fill_value=0)
    total_count1 = grouped1.sum(axis=1)
    proportions1 = grouped1.div(total_count1, axis=0) * 100
    proportions1 = proportions1.round(2)  # 四捨五入到小數點後兩位
    proportions1.columns = [f'受傷{i}' for i in range(grouped1.shape[1])]  # 更新列名稱

    # 計算死亡比例
    grouped2 = full.groupby([category_column, '死亡']).size().unstack(fill_value=0)
    total_count2 = grouped2.sum(axis=1)
    proportions2 = grouped2.div(total_count2, axis=0) * 100
    proportions2 = proportions2.round(2)  # 四捨五入到小數點後兩位
    proportions2.columns = [f'死亡{i}' for i in range(grouped2.shape[1])]  # 更新列名稱

    # 合併兩個 DataFrame
    final_df = proportions1.join(proportions2)
    final_df['總數'] = total_count1
    # 重置索引以將 category_column 作為一個普通列
    final_df.reset_index(inplace=True)

    return final_df

In [235]:
de = full_0_X.shape[0] + full_1_X.shape[0]
logit_avg_score = (full_0_X.shape[0]/de)*score_0 + (full_1_X.shape[0]/de)*score_1
print(logit_avg_score)

0.6483640964279711


In [12]:
X01, y01, p01 = pval(full_0, full_2, lst_regression)

p01[p01['p_value'] < 0.05]


The max_iter was reached which means the coef_ did not converge



Unnamed: 0,coefficients,standard_error,wald_statistics,p_value,feature
速限-第1當事者,0.055748,0.01058,5.268942,1.372121e-07,速限-第1當事者


In [41]:
p01.to_csv('CalculatedData/所有分析/p.csv', index=False)

In [23]:
result = table('速限-第1當事者', full_0, full_1, full_01)
result#.to_csv('CalculatedData/所有分析/速限比例表.csv', index=False)

Unnamed: 0,cluster1,cluster2,cluster12
0,0.999941,0.0,0.02
1,5.9e-05,1.0,0.98


In [27]:
result = table('受傷', full_0, full_1, full_01)
result#.to_csv('CalculatedData/所有分析/速限_受傷比例表0.csv', index=False)

In [28]:
result = table('死亡', full_0, full_1, full_01)
result#.to_csv('CalculatedData/所有分析/速限_死亡比例表1.csv', index=False)

### 去掉速限，還有哪些可以解釋

In [51]:
lst_regression12 = [
    '天候名稱',
    '路面狀況-路面狀態名稱',
    '肇因研判大類別名稱-主要', '當事者屬-性-別名稱', '當事者事故發生時年齡', 
    '車輛撞擊部位大類別名稱-最初',
    '光線名稱',
    '道路類別-第1當事者-名稱', 
    '道路型態大類別名稱', 
    '事故位置大類別名稱',
    '號誌-號誌種類名稱',
    '車道劃分設施-分向設施大類別名稱', '車道劃分設施-分道設施-快車道或一般車道間名稱',
    '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '事故類型及型態大類別名稱',
]
# X01, y01, p01 = pval(full_0, full_1, lst_regression12)

# p01[p01['p_value'] < 0.05]

In [31]:
p01.to_csv('CalculatedData/所有分析/去掉關鍵p.csv', index=False)

In [33]:
proportions = calculate_proportions(full_0, '車輛撞擊部位大類別名稱-最初')
proportions#.to_csv('CalculatedData/所有分析/輛撞擊部位0.csv', index=False)

In [34]:
proportions = calculate_proportions(full_1, '車輛撞擊部位大類別名稱-最初')
proportions#.to_csv('CalculatedData/所有分析/輛撞擊部位1.csv', index=False)

In [35]:
proportions = calculate_proportions(full_0, '當事者屬-性-別名稱')
proportions#.to_csv('CalculatedData/所有分析/性別0.csv', index=False)

In [36]:
proportions = calculate_proportions(full_1, '當事者屬-性-別名稱')
proportions#.to_csv('CalculatedData/所有分析/性別1.csv', index=False)

In [37]:
proportions = calculate_proportions(full_0, '當事者事故發生時年齡')
proportions#.to_csv('CalculatedData/所有分析/年齡0.csv', index=False)

In [38]:
proportions = calculate_proportions(full_1, '當事者事故發生時年齡')
proportions#.to_csv('CalculatedData/所有分析/年齡1.csv', index=False)

In [131]:
X, y, p = pval(full_1, full_2, lst_regression)

from statsmodels.stats.outliers_influence import variance_inflation_factor

def calculate_vif(X):
    vif = pd.DataFrame()
    vif["features"] = X.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    return vif

# 假设 c0_for_lm_X 是你的预测变量DataFrame
vif_df = calculate_vif(X01[lst_regression])
print(vif_df)


The max_iter was reached which means the coef_ did not converge



                   features       VIF
0                      天候名稱  2.940282
1               路面狀況-路面狀態名稱  2.930954
2              肇因研判大類別名稱-主要  1.094702
3                當事者屬-性-別名稱  1.017223
4                當事者事故發生時年齡  1.031751
5            車輛撞擊部位大類別名稱-最初  1.078671
6                      光線名稱  1.023237
7             道路類別-第1當事者-名稱  1.047871
8                  速限-第1當事者  1.001220
9                 道路型態大類別名稱  7.700875
10                事故位置大類別名稱  7.617299
11                號誌-號誌種類名稱  1.227127
12         車道劃分設施-分向設施大類別名稱  1.085831
13  車道劃分設施-分道設施-快車道或一般車道間名稱  1.205327
14      車道劃分設施-分道設施-快慢車道間名稱  1.102344
15       車道劃分設施-分道設施-路面邊線名稱  1.150402
16             事故類型及型態大類別名稱  1.091155


## 小樣本

In [107]:
lst_regression12 = [
    '天候名稱',
    '路面狀況-路面狀態名稱',
    '肇因研判大類別名稱-主要', '當事者屬-性-別名稱', '當事者事故發生時年齡', 
    '車輛撞擊部位大類別名稱-最初',
    '光線名稱',
    '道路類別-第1當事者-名稱', 
    '道路型態大類別名稱', 
    '速限-第1當事者', 
    '事故位置大類別名稱',
    '號誌-號誌種類名稱',
    '車道劃分設施-分向設施大類別名稱', '車道劃分設施-分道設施-快車道或一般車道間名稱',
    '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '事故類型及型態大類別名稱',
]
X01, y01, p01 = pval(full_0, full_2, lst_regression12)

p01[p01['p_value'] < 0.05]


The max_iter was reached which means the coef_ did not converge



Unnamed: 0,coefficients,standard_error,wald_statistics,p_value,feature
路面狀況-路面狀態名稱,1.411148,0.126462,11.158663,0.0,路面狀況-路面狀態名稱
天候名稱,0.639038,0.126658,5.045366,4.526537e-07,天候名稱


In [110]:
lst_regression12 = [
    # '天候名稱',
    # '路面狀況-路面狀態名稱',
    '肇因研判大類別名稱-主要', '當事者屬-性-別名稱', '當事者事故發生時年齡', 
    '車輛撞擊部位大類別名稱-最初',
    '光線名稱',
    '道路類別-第1當事者-名稱', 
    '道路型態大類別名稱', 
    '速限-第1當事者', 
    '事故位置大類別名稱',
    '號誌-號誌種類名稱',
    '車道劃分設施-分向設施大類別名稱', '車道劃分設施-分道設施-快車道或一般車道間名稱',
    '車道劃分設施-分道設施-快慢車道間名稱', '車道劃分設施-分道設施-路面邊線名稱',
    '事故類型及型態大類別名稱',
]
X01, y01, p01 = pval(full_0, full_2, lst_regression12)

p01[p01['p_value'] < 0.05]


The max_iter was reached which means the coef_ did not converge



Unnamed: 0,coefficients,standard_error,wald_statistics,p_value,feature
光線名稱,-0.096099,0.032157,-2.988445,0.002804,光線名稱
車道劃分設施-分道設施-快車道或一般車道間名稱,0.073193,0.032756,2.23447,0.025452,車道劃分設施-分道設施-快車道或一般車道間名稱
車道劃分設施-分道設施-快慢車道間名稱,0.067344,0.033694,1.998713,0.045639,車道劃分設施-分道設施-快慢車道間名稱


In [120]:
result = table('路面狀況-路面狀態名稱', full_0, full_2, full_02)
result

Unnamed: 0,cluster1,cluster2,cluster12
乾燥,0.99798,0.006195,0.255319
油滑,0.000826,0.0,0.085106
濕潤,0.000551,0.987611,0.531915
冰雪,0.000551,0.0,0.0
泥濘,9.2e-05,0.006195,0.12766


In [121]:
result = table('天候名稱', full_0, full_2, full_02)
result

Unnamed: 0,cluster1,cluster2,cluster12
晴,0.891552,0.043363,0.404255
陰,0.108448,0.135398,0.510638
雨,0.0,0.812389,0.0
風,0.0,0.00531,0.0
霧或煙,0.0,0.00354,0.085106


In [122]:
result = table('受傷', full_0, full_2, full_02)
result

Unnamed: 0,cluster1,cluster2,cluster12
1,0.685491,0.767257,0.787234
2,0.285583,0.20885,0.170213
0,0.028926,0.023894,0.042553


In [124]:
result = table('死亡', full_0, full_2, full_02)
result

Unnamed: 0,cluster1,cluster2,cluster12
0,0.956382,0.965487,0.957447
1,0.042608,0.034513,0.042553
2,0.00101,0.0,0.0


In [125]:
proportions = calculate_proportions(full_0, '光線名稱')
proportions

Unnamed: 0,光線名稱,受傷0,受傷1,受傷2,死亡0,死亡1,死亡2,總數
0,有照明且開啟,2.98,68.43,28.58,95.32,4.57,0.11,3719
1,有照明未開啟或故障,2.89,69.05,28.06,95.7,4.21,0.09,4562
2,無照明,2.76,67.84,29.4,95.98,3.91,0.11,2609


In [126]:
proportions = calculate_proportions(full_2, '光線名稱')
proportions

Unnamed: 0,光線名稱,受傷0,受傷1,受傷2,死亡0,死亡1,總數
0,有照明且開啟,3.05,74.39,22.56,95.12,4.88,492
1,有照明未開啟或故障,2.27,77.95,19.77,97.27,2.73,440
2,無照明,1.01,79.8,19.19,98.48,1.52,198


In [127]:
proportions = calculate_proportions(full_0, '車道劃分設施-分道設施-快慢車道間名稱')
proportions

Unnamed: 0,車道劃分設施-分道設施-快慢車道間名稱,受傷0,受傷1,受傷2,死亡0,死亡1,死亡2,總數
0,寬式快慢車道分隔島(50公分以上),6.22,67.11,26.67,92.44,7.56,0.0,225
1,快慢車道分隔線,3.14,68.62,28.24,95.07,4.77,0.16,1845
2,未繪設快慢車道分隔線,2.73,68.59,28.69,95.87,4.04,0.09,8764
3,窄式快慢車道分隔島(無柵欄),2.7,72.97,24.32,94.59,5.41,0.0,37
4,窄式快慢車道分隔島(附柵欄),15.79,52.63,31.58,84.21,15.79,0.0,19


In [128]:
proportions = calculate_proportions(full_2, '車道劃分設施-分道設施-快慢車道間名稱')
proportions

Unnamed: 0,車道劃分設施-分道設施-快慢車道間名稱,受傷0,受傷1,受傷2,死亡0,死亡1,總數
0,寬式快慢車道分隔島(50公分以上),0.0,86.36,13.64,100.0,0.0,22
1,快慢車道分隔線,1.96,76.47,21.57,96.08,3.92,153
2,未繪設快慢車道分隔線,2.53,76.4,21.07,96.52,3.48,949
3,窄式快慢車道分隔島(無柵欄),0.0,100.0,0.0,100.0,0.0,2
4,窄式快慢車道分隔島(附柵欄),0.0,100.0,0.0,100.0,0.0,4


In [129]:
proportions = calculate_proportions(full_0, '車道劃分設施-分道設施-快車道或一般車道間名稱')
proportions

Unnamed: 0,車道劃分設施-分道設施-快車道或一般車道間名稱,受傷0,受傷1,受傷2,死亡0,死亡1,死亡2,總數
0,未繪設車道線,2.81,67.56,29.63,95.56,4.33,0.11,6516
1,禁止變換車道線(無標記),3.23,69.23,27.54,96.03,3.97,0.0,403
2,禁止變換車道線(附標記),1.38,72.41,26.21,95.86,4.14,0.0,145
3,車道線(無標記),3.15,70.02,26.83,95.57,4.34,0.09,3205
4,車道線(附標記),2.58,70.05,27.38,96.46,3.38,0.16,621


In [130]:
proportions = calculate_proportions(full_2, '車道劃分設施-分道設施-快車道或一般車道間名稱')
proportions

Unnamed: 0,車道劃分設施-分道設施-快車道或一般車道間名稱,受傷0,受傷1,受傷2,死亡0,死亡1,總數
0,未繪設車道線,2.25,78.76,19.0,96.89,3.11,579
1,禁止變換車道線(無標記),2.13,78.72,19.15,95.74,4.26,47
2,禁止變換車道線(附標記),0.0,76.19,23.81,100.0,0.0,21
3,車道線(無標記),2.96,72.59,24.44,95.56,4.44,405
4,車道線(附標記),1.28,82.05,16.67,98.72,1.28,78
