# Ordinal Logistic Regression
* accident+violation 을 사용한 순서형 로짓 분석

In [200]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import mord
from sklearn.preprocessing import StandardScaler

In [201]:
acc_vio = pd.read_csv('data/FINAL_accident+violence.csv', encoding = 'cp949')

## 데이터 가공

### acc_vio의 '사고등급' 변수를 다음과 같이 조정: A = 4, B = 3, C = 2, D = 1

In [202]:
def accgrade(x):
    if x == 'A급':
        return 4
    elif x == 'B급':
        return 3
    elif x == 'C급':
        return 2
    elif x == 'D급':
        return 1
    else:
        pass

In [203]:
acc_vio['accgrade'] = acc_vio['사고등급'].apply(accgrade)

In [204]:
def daynightfunc(x):
    """주간 = 0, 야간 = 1로 나타내는 더미변수로 만들기"""
    if x == '주간':
        return 0
    else:
        return 1

In [205]:
acc_vio['daynight'] = acc_vio['주야구분'].apply(daynightfunc)

In [206]:
acc_vio = acc_vio.drop(['주야구분', '사고등급','Unnamed: 0', 'Unnamed: 0_x', 'Unnamed: 0.1', '사고일자', '월별구분', '사고시간'], axis = 'columns')

In [207]:
acc_vio.head(3)

Unnamed: 0,사고일자.1,시간단위_3시간,노선명,이정,방향,사망,부상,중상,경상,발생지점,...,totalTF,totalTF_1,totalTF_HV,violationTF,violationTF_1,violationTF_HV,total_ViolationRate,HV_ViolationRate,accgrade,daynight
0,20170803,15-18,중부내륙선,226.2,양평,0,0,0,0,본선,...,2186.0,1766.0,420.0,0.0,0.0,0.0,0.0,0.0,2,0
1,20170803,15-18,중부내륙선,226.2,양평,0,0,0,0,본선,...,2373.0,1925.0,448.0,0.0,0.0,0.0,0.0,0.0,2,0
2,20170805,18-21,광주대구선,43.4,대구,0,1,1,0,본선,...,1393.0,1261.0,132.0,0.0,0.0,0.0,0.0,0.0,2,0


In [208]:
acc_vio.columns

Index(['사고일자.1', '시간단위_3시간', '노선명', '이정', '방향', '사망', '부상', '중상', '경상', '발생지점',
       '구분', '차로', '사고전차량\n통행속도', '사고원인\n구분', '주 사고원인\n', '사고직전차량조작', '운전자상태',
       '교통장애요인', '사고시도로환경', '날씨', '사고유형', '사고유형_2차', '사고차량수', '공사보유장비사고',
       '포장구분', '평면선형', '선형구분', '종단경사', '노면상태', '작업장구분', '절성토구분', '방책시설_중분대',
       '원인차차종', '원인차_차종구분', '도로명', '도로단축명', '도로표출명', '기점종점방향구분코드', '시점명',
       '종점명', 'conzoneID', 'conzoneName', 'Unnamed: 0_y', 'date', 'routeNo',
       'direction', 'conzoneId', 'conzoneNm', 'busLaneGubun', 'time',
       'totalTF', 'totalTF_1', 'totalTF_HV', 'violationTF', 'violationTF_1',
       'violationTF_HV', 'total_ViolationRate', 'HV_ViolationRate', 'accgrade',
       'daynight'],
      dtype='object')

### acc_vio에서 필요없는 변수를 제거
* 사고일자, 노선명, 이정, 방향, 사망, 부상, 중상, 경상, 발생지점, 구분, 차로, 사고원인구분, 원인차_차종구분, 도로명, 도로단축명, 도로표출명, 기점종점방향구분코드, 시점명, 종점명, conzoneID, conzoneName, date, routeNo, direction, conzoneId, conzoneNm, time

In [209]:
delete_columns = ['사고일자.1', '노선명', '이정', '방향', '사망', '부상', '중상', '경상', '발생지점', '구분', '차로', '사고원인\n구분', '원인차_차종구분', '도로명', '도로단축명', '도로표출명', '기점종점방향구분코드', '시점명', '종점명', 'conzoneID', 'conzoneName', 'date', 'routeNo', 'direction', 'conzoneId', 'conzoneNm', 'time']

acc_vio = acc_vio.drop(delete_columns, axis = 'columns')

In [210]:
acc_vio.head()

Unnamed: 0,시간단위_3시간,사고전차량\n통행속도,주 사고원인\n,사고직전차량조작,운전자상태,교통장애요인,사고시도로환경,날씨,사고유형,사고유형_2차,...,totalTF,totalTF_1,totalTF_HV,violationTF,violationTF_1,violationTF_HV,total_ViolationRate,HV_ViolationRate,accgrade,daynight
0,15-18,106.0,과속,운행차로주행,정상,장애없음,정상,맑음,차-차,추돌(rear-end),...,2186.0,1766.0,420.0,0.0,0.0,0.0,0.0,0.0,2,0
1,15-18,106.0,과속,운행차로주행,정상,장애없음,정상,맑음,차-차,추돌(rear-end),...,2373.0,1925.0,448.0,0.0,0.0,0.0,0.0,0.0,2,0
2,18-21,80.0,과속,운행차로주행,정상,장애없음,미끄러운 노면,비,차-시설,,...,1393.0,1261.0,132.0,0.0,0.0,0.0,0.0,0.0,2,0
3,18-21,80.0,과속,운행차로주행,정상,장애없음,미끄러운 노면,비,차-시설,,...,2792.0,2568.0,224.0,0.0,0.0,0.0,0.0,0.0,2,0
4,12-15,97.0,과속,운행차로주행,정상,장애없음,정상,비,차-시설,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0


In [211]:
acc_vio.columns

Index(['시간단위_3시간', '사고전차량\n통행속도', '주 사고원인\n', '사고직전차량조작', '운전자상태', '교통장애요인',
       '사고시도로환경', '날씨', '사고유형', '사고유형_2차', '사고차량수', '공사보유장비사고', '포장구분', '평면선형',
       '선형구분', '종단경사', '노면상태', '작업장구분', '절성토구분', '방책시설_중분대', '원인차차종',
       'Unnamed: 0_y', 'busLaneGubun', 'totalTF', 'totalTF_1', 'totalTF_HV',
       'violationTF', 'violationTF_1', 'violationTF_HV', 'total_ViolationRate',
       'HV_ViolationRate', 'accgrade', 'daynight'],
      dtype='object')

### 더미변수 생성을 위한 함수설정
* 사고직전차량조작(operation_)
    * (운행차로주행, 기타)
    * 차로변경(01)
    * 핸들과대조작(02)
* 주사고원인(acc_reason_)
    * 과속(01)
    * 주시태만(02)
    * 졸음(03)
    * 안전거리미확보(04)
    * 추월불량(05)
    * (운전요인기타), (기타)
* 운전자상태(driver_condition_)
    * (정상)
    * 피로 : 01
    * 음주 : 02
    * 기타 : 03
    * 질병 : 04
* 교통장애요인(environment_)
    * (정상)
    * 미끄러운 노면 : 01
    * 낙하물 : 02
    * 시거장애 : 03
    * 포트홀 : 04
    * 기타 : 05
* 공사보유장비사고(workzone)
    * (관련없음) -- 0
    * 관련있음 -- 1
* 평면선형(alignment_)'
    * (직선)
    * '좌커브 1000m 이상' : 01
    * '우커브 1000m 이상' : 02
    * '좌커브' : 03
    * '우커브' : 04
* 원인차차종(reason_veh_)
    * (승용)
    * 화물 : 01
    * 승합 : 02
    * 특수차량 : 03
    * 트레일러 : 04
* 선형구분(line)
    * 원곡선부 : 01
    * 시점부(클로소이드) : 02
    * 종점부(클로소이드) : 03
* 종단경사(slope)
    * (평탄)
    * 내리막 : 01
    * 오르막 : 02

In [212]:
def operation_01_dummy(x):
    """사고직전차량조작 - 차로변경 :: 01"""
    if x == '차로변경':
        return 1
    else:
        return 0
    
def operation_02_dummy(x):
    """사고직전차량조작 - 핸들과대조작 :: 02"""
    if x == '핸들과대조작':
        return 1
    else:
        return 0

In [213]:
def acc_reason_01_dummy(x):
    """주사고원인 - 과속 :: 01"""
    if x == '과속':
        return 1
    else:
        return 0

def acc_reason_02_dummy(x):
    """주사고원인 - 주시태만 :: 02"""
    if x == '주시태만':
        return 1
    else:
        return 0

def acc_reason_03_dummy(x):
    """주사고원인 - 졸음 :: 03"""
    if x == '졸음':
        return 1
    else:
        return 0
    
def acc_reason_04_dummy(x):
    """주사고원인 - 안전거리미확보 :: 04"""
    if x == '안전거리미확보':
        return 1
    else:
        return 0
    
def acc_reason_05_dummy(x):
    """주사고원인 - 추월불량 :: 05"""
    if x == '추월불량':
        return 1
    else:
        return 0

In [214]:
def driver_condition_01_dummy(x):
    """운전자상태 - 피로 :: 01"""
    if x == '피로':
        return 1
    else: 
        return 0
    
def driver_condition_02_dummy(x):
    """운전자상태 - 음주 :: 02"""
    if x == '음주':
        return 1
    else:
        return 0

def driver_condition_03_dummy(x):
    """운전자상태 - 질병 :: 03"""
    if x == '질병':
        return 1
    else:
        return 0
    
def driver_condition_04_dummy(x):
    if x == '기타':
        return 1
    else:
        return 0

In [215]:
def environment_01_dummy(x):
    """교통장애요인 - 미끄러운 노면 :: 01"""
    if x == '미끄러운 노면':
        return 1
    else:
        return 0

def environment_02_dummy(x):
    """교통장애요인 - 낙하물 :: 02"""
    if x == '낙하물':
        return 1
    else:
        return 0
    
def environment_03_dummy(x):
    """교통장애요인 - 시거장애 :: 03"""
    if x == '시거장애':
        return 1
    else:
        return 0
    
def environment_04_dummy(x):
    """교통장애요인 - 포트홀 :: 04"""
    if x == '포트홀':
        return 1
    else:
        return 0
    
def environment_05_dummy(x):
    """교통장애요인 - 기타 :: 05"""
    if x == '기타':
        return 1
    else:
        return 0

In [216]:
def workzone_dummy(x):
    """공사보유장비사고 - 관련있음 :: 1, 관련없음 :: 0"""
    if x == '관련있음':
        return 1
    else:
        return 0

In [217]:
def alignment_01_dummy(x):
    """평면선형 - 좌커브 1000m 이상 :: 01"""
    if x == '좌커브 1000m 이상':
        return 1
    else :
        return 0
    
def alignment_02_dummy(x):
    """평면선형 - 우커브 1000m 이상 :: 02"""
    if x == '우커브 1000m 이상':
        return 1
    else:
        return 0
    
def alignment_03_dummy(x):
    """평면선형 - 좌커브 :: 03"""
    if x == '좌커브':
        return 1
    else:
        return 0
    
def alignment_04_dummy(x):
    """평면선형 - 우커브 :: 04"""
    if x == '우커브':
        return 1
    else:
        return 0

In [218]:
def reason_veh_01_dummy(x):
    """원인차차종 - 화물 : 01 """
    if x == '화물':
        return 1
    else:
        return 0
    
def reason_veh_02_dummy(x):
    """원인차차종 - 승합 : 02 """
    if x == '승합':
        return 1
    else:
        return 0

def reason_veh_03_dummy(x):
    """원인차차종 - 특수차량 : 01 """
    if x == '특수차량':
        return 1
    else:
        return 0
    
def reason_veh_04_dummy(x):
    """원인차차종 - 트레일러 : 04 """
    if x == '트레일러':
        return 1
    else:
        return 0

In [219]:
def line_01_dummy(x):
    """선형구분 - 원곡선부 :: 01"""
    if x == '원곡선부':
        return 1
    else:
        return 0
    
def line_02_dummy(x):
    """선형구분 - 시점부(클로소이드) :: 02"""
    if x == '시점부(클로소이드)':
        return 1
    else:
        return 0
    
def line_03_dummy(x):
    """선형구분 - 종점부(클로소이드) :: 03"""
    if x == '종점부(클로소이드)':
        return 1
    else:
        return 0

In [220]:
def slope_01_dummy(x):
    """종단경사 - 내리막 :: 01"""
    if x == '내리막':
        return 1
    else:
        return 0
    
def slope_02_dummy(x):
    """종단경사 - 오르막 :: 02"""
    if x == '오르막':
        return 1
    else:
        return 0

### 더미변수 생성

In [221]:
acc_vio['operation_01'] = acc_vio['사고직전차량조작'].apply(operation_01_dummy)
acc_vio['operation_02'] = acc_vio['사고직전차량조작'].apply(operation_02_dummy)

In [222]:
acc_vio['acc_reason_01'] = acc_vio['주 사고원인\n'].apply(acc_reason_01_dummy)
acc_vio['acc_reason_02'] = acc_vio['주 사고원인\n'].apply(acc_reason_02_dummy)
acc_vio['acc_reason_03'] = acc_vio['주 사고원인\n'].apply(acc_reason_03_dummy)
acc_vio['acc_reason_04'] = acc_vio['주 사고원인\n'].apply(acc_reason_04_dummy)
acc_vio['acc_reason_05'] = acc_vio['주 사고원인\n'].apply(acc_reason_05_dummy)

In [223]:
acc_vio['environment_01'] = acc_vio['교통장애요인'].apply(environment_01_dummy)
acc_vio['environment_02'] = acc_vio['교통장애요인'].apply(environment_02_dummy)
acc_vio['environment_03'] = acc_vio['교통장애요인'].apply(environment_03_dummy)
acc_vio['environment_04'] = acc_vio['교통장애요인'].apply(environment_04_dummy)
acc_vio['environment_05'] = acc_vio['교통장애요인'].apply(environment_05_dummy)

In [224]:
acc_vio['workzone'] = acc_vio['공사보유장비사고'].apply(workzone_dummy)

In [225]:
acc_vio['alignmnet_01'] = acc_vio['평면선형'].apply(alignment_01_dummy)
acc_vio['alignmnet_02'] = acc_vio['평면선형'].apply(alignment_02_dummy)
acc_vio['alignmnet_03'] = acc_vio['평면선형'].apply(alignment_03_dummy)
acc_vio['alignmnet_04'] = acc_vio['평면선형'].apply(alignment_04_dummy)

In [226]:
acc_vio['reason_veh_01'] = acc_vio['원인차차종'].apply(reason_veh_01_dummy)
acc_vio['reason_veh_02'] = acc_vio['원인차차종'].apply(reason_veh_02_dummy)
acc_vio['reason_veh_03'] = acc_vio['원인차차종'].apply(reason_veh_03_dummy)
acc_vio['reason_veh_04'] = acc_vio['원인차차종'].apply(reason_veh_04_dummy)

In [227]:
acc_vio['line_01'] = acc_vio['선형구분'].apply(line_01_dummy)
acc_vio['line_02'] = acc_vio['선형구분'].apply(line_02_dummy)
acc_vio['line_03'] = acc_vio['선형구분'].apply(line_03_dummy)

In [228]:
acc_vio['slope_01'] = acc_vio['종단경사'].apply(slope_01_dummy)
acc_vio['slope_02'] = acc_vio['종단경사'].apply(slope_02_dummy)

In [229]:
acc_vio['driver_condition_01'] = acc_vio['운전자상태'].apply(driver_condition_01_dummy)
acc_vio['driver_condition_02'] = acc_vio['운전자상태'].apply(driver_condition_02_dummy)
acc_vio['driver_condition_03'] = acc_vio['운전자상태'].apply(driver_condition_03_dummy)
acc_vio['driver_condition_04'] = acc_vio['운전자상태'].apply(driver_condition_04_dummy)

In [230]:
acc_vio.columns

Index(['시간단위_3시간', '사고전차량\n통행속도', '주 사고원인\n', '사고직전차량조작', '운전자상태', '교통장애요인',
       '사고시도로환경', '날씨', '사고유형', '사고유형_2차', '사고차량수', '공사보유장비사고', '포장구분', '평면선형',
       '선형구분', '종단경사', '노면상태', '작업장구분', '절성토구분', '방책시설_중분대', '원인차차종',
       'Unnamed: 0_y', 'busLaneGubun', 'totalTF', 'totalTF_1', 'totalTF_HV',
       'violationTF', 'violationTF_1', 'violationTF_HV', 'total_ViolationRate',
       'HV_ViolationRate', 'accgrade', 'daynight', 'operation_01',
       'operation_02', 'acc_reason_01', 'acc_reason_02', 'acc_reason_03',
       'acc_reason_04', 'acc_reason_05', 'environment_01', 'environment_02',
       'environment_03', 'environment_04', 'environment_05', 'workzone',
       'alignmnet_01', 'alignmnet_02', 'alignmnet_03', 'alignmnet_04',
       'reason_veh_01', 'reason_veh_02', 'reason_veh_03', 'reason_veh_04',
       'line_01', 'line_02', 'line_03', 'slope_01', 'slope_02',
       'driver_condition_01', 'driver_condition_02', 'driver_condition_03',
       'driver_condition_04'],
      dt

### 분석에 쓰지 않을 변수들 삭제(더미변수화 한것들)

In [231]:
delete_list = ['Unnamed: 0_y', 'busLaneGubun', '시간단위_3시간', '주 사고원인\n', '사고직전차량조작', '운전자상태', '교통장애요인', '사고시도로환경', '날씨', '사고유형', '사고유형_2차', '공사보유장비사고', '포장구분', '평면선형', '선형구분', '종단경사', '노면상태', '작업장구분', '절성토구분', '방책시설_중분대', '원인차차종', '사고전차량\n통행속도', '사고차량수']

acc_vio = acc_vio.drop(delete_list, axis = 'columns')

In [232]:
acc_vio

Unnamed: 0,totalTF,totalTF_1,totalTF_HV,violationTF,violationTF_1,violationTF_HV,total_ViolationRate,HV_ViolationRate,accgrade,daynight,...,reason_veh_04,line_01,line_02,line_03,slope_01,slope_02,driver_condition_01,driver_condition_02,driver_condition_03,driver_condition_04
0,2186.0,1766.0,420.0,0.0,0.0,0.0,0.000000,0.000000,2,0,...,0,0,0,0,0,0,0,0,0,0
1,2373.0,1925.0,448.0,0.0,0.0,0.0,0.000000,0.000000,2,0,...,0,0,0,0,0,0,0,0,0,0
2,1393.0,1261.0,132.0,0.0,0.0,0.0,0.000000,0.000000,2,0,...,0,1,0,0,1,0,0,0,0,0
3,2792.0,2568.0,224.0,0.0,0.0,0.0,0.000000,0.000000,2,0,...,0,1,0,0,1,0,0,0,0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,2,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,7736.0,6328.0,1408.0,162.0,0.0,162.0,0.020941,0.115057,2,0,...,0,0,0,0,0,0,0,0,0,0
750,653.0,365.0,288.0,653.0,365.0,288.0,1.000000,1.000000,2,0,...,0,0,1,0,0,0,0,0,0,0
751,3645.0,2953.0,692.0,2165.0,1815.0,350.0,0.593964,0.505780,2,0,...,0,0,1,0,0,0,0,0,0,0
752,12162.0,10886.0,1276.0,64.0,0.0,64.0,0.005262,0.050157,2,0,...,0,0,0,0,0,0,0,0,0,0


In [233]:
acc_vio.describe().round(2).transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
totalTF,754.0,5871.47,4603.94,0.0,1988.0,4299.5,9820.25,16902.0
totalTF_1,754.0,5035.02,4249.22,0.0,1428.5,3501.5,8509.5,15926.0
totalTF_HV,754.0,836.44,752.59,0.0,309.25,635.5,1241.5,8263.0
violationTF,754.0,178.91,580.84,0.0,0.0,0.0,132.75,6223.0
violationTF_1,754.0,67.42,397.16,0.0,0.0,0.0,0.0,5808.0
violationTF_HV,754.0,111.49,280.89,0.0,0.0,0.0,132.75,3264.0
total_ViolationRate,754.0,0.03,0.11,0.0,0.0,0.0,0.01,1.0
HV_ViolationRate,754.0,0.1,0.19,0.0,0.0,0.0,0.12,1.0
accgrade,754.0,2.11,0.32,2.0,2.0,2.0,2.0,4.0
daynight,754.0,0.14,0.35,0.0,0.0,0.0,0.0,1.0


In [192]:
acc_vio.to_csv('data/FIN_accident+violation_dummied.csv', encoding = 'cp949')

## Scaler를 사용한 표준화

In [193]:
acc_vio.columns # 표준화할 변수들 : index 0~7까지

Index(['totalTF', 'totalTF_1', 'totalTF_HV', 'violationTF', 'violationTF_1',
       'violationTF_HV', 'total_ViolationRate', 'HV_ViolationRate', 'accgrade',
       'daynight', 'operation_01', 'operation_02', 'acc_reason_01',
       'acc_reason_02', 'acc_reason_03', 'acc_reason_04', 'acc_reason_05',
       'environment_01', 'environment_02', 'environment_03', 'environment_04',
       'environment_05', 'workzone', 'alignmnet_01', 'alignmnet_02',
       'alignmnet_03', 'alignmnet_04', 'reason_veh_01', 'reason_veh_02',
       'reason_veh_03', 'reason_veh_04', 'line_01', 'line_02', 'line_03',
       'slope_01', 'slope_02', 'driver_condition_01', 'driver_condition_02',
       'driver_condition_03', 'driver_condition_04'],
      dtype='object')

In [194]:
# 표준화
acc_vio.iloc[:, 0:7] = pd.DataFrame(StandardScaler().fit_transform(acc_vio.iloc[:, 0:7]))

In [195]:
acc_vio.head(3)

Unnamed: 0,totalTF,totalTF_1,totalTF_HV,violationTF,violationTF_1,violationTF_HV,total_ViolationRate,HV_ViolationRate,accgrade,daynight,...,reason_veh_04,line_01,line_02,line_03,slope_01,slope_02,driver_condition_01,driver_condition_02,driver_condition_03,driver_condition_04
0,-0.801035,-0.769833,-0.553711,-0.30822,-0.169871,-0.397162,-0.275156,0.0,2,0,...,0,0,0,0,0,0,0,0,0,0
1,-0.76039,-0.73239,-0.516482,-0.30822,-0.169871,-0.397162,-0.275156,0.0,2,0,...,0,0,0,0,0,0,0,0,0,0
2,-0.973393,-0.888757,-0.936642,-0.30822,-0.169871,-0.397162,-0.275156,0.0,2,0,...,0,1,0,0,1,0,0,0,0,0


In [199]:
acc_vio.to_csv('data/FIN_accident+violation_dummied_scaled2.csv', encoding = 'cp949')