In [61]:
import pandas as pd
import warnings
import os
import sys
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

import missingno as msno

root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))

sys.path.insert(0, root_dir)

from config import DATA_PATH

# Show all columns
pd.set_option('display.max_columns', None)

In [62]:
df = pd.read_csv(os.path.join(DATA_PATH, 'RTA Dataset.csv'))

print(df.shape)

(12316, 32)


In [63]:
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.time

In [64]:
df.head()

Unnamed: 0,Time,Day_of_week,Age_band_of_driver,Sex_of_driver,Educational_level,Vehicle_driver_relation,Driving_experience,Type_of_vehicle,Owner_of_vehicle,Service_year_of_vehicle,Defect_of_vehicle,Area_accident_occured,Lanes_or_Medians,Road_allignment,Types_of_Junction,Road_surface_type,Road_surface_conditions,Light_conditions,Weather_conditions,Type_of_collision,Number_of_vehicles_involved,Number_of_casualties,Vehicle_movement,Casualty_class,Sex_of_casualty,Age_band_of_casualty,Casualty_severity,Work_of_casuality,Fitness_of_casuality,Pedestrian_movement,Cause_of_accident,Accident_severity
0,17:02:00,Monday,18-30,Male,Above high school,Employee,1-2yr,Automobile,Owner,Above 10yr,No defect,Residential areas,,Tangent road with flat terrain,No junction,Asphalt roads,Dry,Daylight,Normal,Collision with roadside-parked vehicles,2,2,Going straight,na,na,na,na,,,Not a Pedestrian,Moving Backward,Slight Injury
1,17:02:00,Monday,31-50,Male,Junior high school,Employee,Above 10yr,Public (> 45 seats),Owner,5-10yrs,No defect,Office areas,Undivided Two way,Tangent road with flat terrain,No junction,Asphalt roads,Dry,Daylight,Normal,Vehicle with vehicle collision,2,2,Going straight,na,na,na,na,,,Not a Pedestrian,Overtaking,Slight Injury
2,17:02:00,Monday,18-30,Male,Junior high school,Employee,1-2yr,Lorry (41?100Q),Owner,,No defect,Recreational areas,other,,No junction,Asphalt roads,Dry,Daylight,Normal,Collision with roadside objects,2,2,Going straight,Driver or rider,Male,31-50,3,Driver,,Not a Pedestrian,Changing lane to the left,Serious Injury
3,01:06:00,Sunday,18-30,Male,Junior high school,Employee,5-10yr,Public (> 45 seats),Governmental,,No defect,Office areas,other,Tangent road with mild grade and flat terrain,Y Shape,Earth roads,Dry,Darkness - lights lit,Normal,Vehicle with vehicle collision,2,2,Going straight,Pedestrian,Female,18-30,3,Driver,Normal,Not a Pedestrian,Changing lane to the right,Slight Injury
4,01:06:00,Sunday,18-30,Male,Junior high school,Employee,2-5yr,,Owner,5-10yrs,No defect,Industrial areas,other,Tangent road with flat terrain,Y Shape,Asphalt roads,Dry,Darkness - lights lit,Normal,Vehicle with vehicle collision,2,2,Going straight,na,na,na,na,,,Not a Pedestrian,Overtaking,Slight Injury


In [65]:
dump_df = df.drop(['Time'], axis=1)

In [66]:
def lowercase_column_and_values(df):
    df.columns = df.columns.str.lower()
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].str.lower()
    return df

dump_df = lowercase_column_and_values(dump_df)

In [67]:
dump_df.head()

Unnamed: 0,day_of_week,age_band_of_driver,sex_of_driver,educational_level,vehicle_driver_relation,driving_experience,type_of_vehicle,owner_of_vehicle,service_year_of_vehicle,defect_of_vehicle,area_accident_occured,lanes_or_medians,road_allignment,types_of_junction,road_surface_type,road_surface_conditions,light_conditions,weather_conditions,type_of_collision,number_of_vehicles_involved,number_of_casualties,vehicle_movement,casualty_class,sex_of_casualty,age_band_of_casualty,casualty_severity,work_of_casuality,fitness_of_casuality,pedestrian_movement,cause_of_accident,accident_severity
0,monday,18-30,male,above high school,employee,1-2yr,automobile,owner,above 10yr,no defect,residential areas,,tangent road with flat terrain,no junction,asphalt roads,dry,daylight,normal,collision with roadside-parked vehicles,2,2,going straight,na,na,na,na,,,not a pedestrian,moving backward,slight injury
1,monday,31-50,male,junior high school,employee,above 10yr,public (> 45 seats),owner,5-10yrs,no defect,office areas,undivided two way,tangent road with flat terrain,no junction,asphalt roads,dry,daylight,normal,vehicle with vehicle collision,2,2,going straight,na,na,na,na,,,not a pedestrian,overtaking,slight injury
2,monday,18-30,male,junior high school,employee,1-2yr,lorry (41?100q),owner,,no defect,recreational areas,other,,no junction,asphalt roads,dry,daylight,normal,collision with roadside objects,2,2,going straight,driver or rider,male,31-50,3,driver,,not a pedestrian,changing lane to the left,serious injury
3,sunday,18-30,male,junior high school,employee,5-10yr,public (> 45 seats),governmental,,no defect,office areas,other,tangent road with mild grade and flat terrain,y shape,earth roads,dry,darkness - lights lit,normal,vehicle with vehicle collision,2,2,going straight,pedestrian,female,18-30,3,driver,normal,not a pedestrian,changing lane to the right,slight injury
4,sunday,18-30,male,junior high school,employee,2-5yr,,owner,5-10yrs,no defect,industrial areas,other,tangent road with flat terrain,y shape,asphalt roads,dry,darkness - lights lit,normal,vehicle with vehicle collision,2,2,going straight,na,na,na,na,,,not a pedestrian,overtaking,slight injury


In [68]:
def remove_trailing_spaces(df):
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].str.strip()
    return df

dump_df = remove_trailing_spaces(dump_df)
dump_df['area_accident_occured'].value_counts()

area_accident_occured
other                              3819
office areas                       3451
residential areas                  2060
church areas                       1060
industrial areas                    456
school areas                        415
recreational areas                  328
outside rural areas                 218
hospital areas                      121
market areas                         63
rural village areas                  44
unknown                              22
rural village areasoffice areas      20
Name: count, dtype: int64

In [69]:
for col in dump_df.columns:
    # Store in a text file
    with open('column_values.txt', 'a') as f:
        f.write(f"Column Name: {col} \n")
        f.write(f"{col} Values: {dump_df[col].value_counts()} \n")
        f.write("-------------------------------------------- \n")

In [70]:
ordinal_list = ['day_of_week', 'age_band_of_driver', 'educational_level',
                'driving_experience', 'service_year_of_vehicle', 'defect_of_vehicle', 
                'age_band_of_casualty', 'casualty_severity', 'accident_severity']
nominal_list = ['sex_of_driver', 'type_of_vehicle', 'owner_of_vehicle',
                'area_accident_occured', 'lanes_or_medians', 'road_allignment', 
                'types_of_junction', 'road_surface_type', 'road_surface_conditions', 
                'light_conditions', 'weather_conditions', 'type_of_collision', 
                'vehicle_movement', 'casualty_class', 'sex_of_casualty', 
                'work_of_casuality', 'fitness_of_casuality', 'pedestrian_movement', 'cause_of_accident', 'vehicle_driver_relation']
nan_list = ['unknown', 'na']

In [71]:
def to_nan(df):
    df.replace(nan_list, np.nan, inplace=True)

to_nan(dump_df)

In [72]:
for col in dump_df.columns:
    # Store in a text file
    with open('new_column_values.txt', 'a') as f:
        f.write(f"Column Name: {col} \n")
        f.write(f"{col} Values: {dump_df[col].value_counts()} \n")
        f.write("-------------------------------------------- \n")

In [73]:
for item in ordinal_list:
    print(item)
    with open('ordinal_values.txt', 'a') as f:
        f.write(f"Column Name: {item} \n")
        f.write(f"{item} Values: {dump_df[item].value_counts()} \n")
        f.write("-------------------------------------------- \n")

day_of_week
age_band_of_driver
educational_level
driving_experience
service_year_of_vehicle
defect_of_vehicle
age_band_of_casualty
casualty_severity
accident_severity


In [74]:
def clean_ordinal_data(df):
    df['day_of_week'] = df['day_of_week'].map({'monday': 1, 'tuesday': 2, 'wednesday': 3, 'thursday': 4, 'friday': 5, 'saturday': 6, 'sunday': 7})
    df['age_band_of_driver'] = df['age_band_of_driver'].map({'under 18': 1, '18-30': 2, '31-50': 3, 'over 51': 4})
    df['educational_level'] = df['educational_level'].map({'illiterate': 1, 'writing & reading': 2, 'elementary school': 3, 'junior high school': 4, 'high school': 5, 'above high school': 6})
    df['driving_experience'] = df['driving_experience'].map({'no licence': 1, 'below 1yr': 2, '1-2yr': 3, '2-5yr': 4, '5-10yr': 5, 'above 10yr': 6})
    df['service_year_of_vehicle'] = df['service_year_of_vehicle'].map({'below 1yr': 1, '1-2yr': 2, '2-5yrs': 3, '5-10yrs': 4, 'above 10yr': 5})
    df['defect_of_vehicle'] = df['defect_of_vehicle'].replace({'no defect': 0})
    df['age_band_of_casualty'] = df['age_band_of_casualty'].map({'under 18': 1, '18-30': 2, '31-50': 3, 'over 51': 4, 5: 1})
    df['accident_severity'] = df['accident_severity'].map({'slight injury': 1, 'serious injury': 2, 'fatal injury': 3})

    return df

dump_df = clean_ordinal_data(dump_df)

In [75]:
dump_df.head()

Unnamed: 0,day_of_week,age_band_of_driver,sex_of_driver,educational_level,vehicle_driver_relation,driving_experience,type_of_vehicle,owner_of_vehicle,service_year_of_vehicle,defect_of_vehicle,area_accident_occured,lanes_or_medians,road_allignment,types_of_junction,road_surface_type,road_surface_conditions,light_conditions,weather_conditions,type_of_collision,number_of_vehicles_involved,number_of_casualties,vehicle_movement,casualty_class,sex_of_casualty,age_band_of_casualty,casualty_severity,work_of_casuality,fitness_of_casuality,pedestrian_movement,cause_of_accident,accident_severity
0,1,2.0,male,6.0,employee,3.0,automobile,owner,5.0,0,residential areas,,tangent road with flat terrain,no junction,asphalt roads,dry,daylight,normal,collision with roadside-parked vehicles,2,2,going straight,,,,,,,not a pedestrian,moving backward,1
1,1,3.0,male,4.0,employee,6.0,public (> 45 seats),owner,4.0,0,office areas,undivided two way,tangent road with flat terrain,no junction,asphalt roads,dry,daylight,normal,vehicle with vehicle collision,2,2,going straight,,,,,,,not a pedestrian,overtaking,1
2,1,2.0,male,4.0,employee,3.0,lorry (41?100q),owner,,0,recreational areas,other,,no junction,asphalt roads,dry,daylight,normal,collision with roadside objects,2,2,going straight,driver or rider,male,3.0,3.0,driver,,not a pedestrian,changing lane to the left,2
3,7,2.0,male,4.0,employee,5.0,public (> 45 seats),governmental,,0,office areas,other,tangent road with mild grade and flat terrain,y shape,earth roads,dry,darkness - lights lit,normal,vehicle with vehicle collision,2,2,going straight,pedestrian,female,2.0,3.0,driver,normal,not a pedestrian,changing lane to the right,1
4,7,2.0,male,4.0,employee,4.0,,owner,4.0,0,industrial areas,other,tangent road with flat terrain,y shape,asphalt roads,dry,darkness - lights lit,normal,vehicle with vehicle collision,2,2,going straight,,,,,,,not a pedestrian,overtaking,1


In [76]:
for item in ordinal_list:
    print(item)
    with open('clean_ordinal_values.txt', 'a') as f:
        f.write(f"Column Name: {item} \n")
        f.write(f"{item} Values: {dump_df[item].value_counts()} \n")
        f.write("-------------------------------------------- \n")

day_of_week
age_band_of_driver
educational_level
driving_experience
service_year_of_vehicle
defect_of_vehicle
age_band_of_casualty
casualty_severity
accident_severity


In [77]:
for item in nominal_list:
    print(item)
    with open('nominal_values.txt', 'a') as f:
        f.write(f"Column Name: {item} \n")
        f.write(f"{item} Values: {dump_df[item].value_counts()} \n")
        f.write("-------------------------------------------- \n")

sex_of_driver
type_of_vehicle
owner_of_vehicle
area_accident_occured
lanes_or_medians
road_allignment
types_of_junction
road_surface_type
road_surface_conditions
light_conditions
weather_conditions
type_of_collision
vehicle_movement
casualty_class
sex_of_casualty
work_of_casuality
fitness_of_casuality
pedestrian_movement
cause_of_accident
vehicle_driver_relation


In [78]:
def clean_nominal_data(df):
    df['area_accident_occured'] = df['area_accident_occured'].replace({'rural village areasoffice areas': 'rural village areas'})
    df['lanes_or_medians'] = df['lanes_or_medians'].replace({'two-way (divided with broken lines road marking)': 'divided two-way', 'two-way (divided with solid lines road marking)': 'divided two-way'})
    df['road_surface_type'] = df['road_surface_type'].replace({'asphalt roads with some distress': 'aspahlt roads'})
    df['light_conditions'] = df['light_conditions'].replace({'daylight': 'daylight', 'darkness - lights lit': 'darkness', 'darkness - lights unlit': 'darkness', 'darkness - no lighting': 'darkness'})
    df['fitness_of_casuality'] = df['fitness_of_casuality'].replace({'normalnormal': 'normal'})
    
    df = pd.get_dummies(df, columns=nominal_list)
    return df

dump_df = clean_nominal_data(dump_df)

In [79]:
dump_df.head()

Unnamed: 0,day_of_week,age_band_of_driver,educational_level,driving_experience,service_year_of_vehicle,defect_of_vehicle,number_of_vehicles_involved,number_of_casualties,age_band_of_casualty,casualty_severity,accident_severity,sex_of_driver_female,sex_of_driver_male,type_of_vehicle_automobile,type_of_vehicle_bajaj,type_of_vehicle_bicycle,type_of_vehicle_long lorry,type_of_vehicle_lorry (11?40q),type_of_vehicle_lorry (41?100q),type_of_vehicle_motorcycle,type_of_vehicle_other,type_of_vehicle_pick up upto 10q,type_of_vehicle_public (12 seats),type_of_vehicle_public (13?45 seats),type_of_vehicle_public (> 45 seats),type_of_vehicle_ridden horse,type_of_vehicle_special vehicle,type_of_vehicle_stationwagen,type_of_vehicle_taxi,type_of_vehicle_turbo,owner_of_vehicle_governmental,owner_of_vehicle_organization,owner_of_vehicle_other,owner_of_vehicle_owner,area_accident_occured_church areas,area_accident_occured_hospital areas,area_accident_occured_industrial areas,area_accident_occured_market areas,area_accident_occured_office areas,area_accident_occured_other,area_accident_occured_outside rural areas,area_accident_occured_recreational areas,area_accident_occured_residential areas,area_accident_occured_rural village areas,area_accident_occured_school areas,lanes_or_medians_divided two-way,lanes_or_medians_double carriageway (median),lanes_or_medians_one way,lanes_or_medians_other,lanes_or_medians_undivided two way,road_allignment_escarpments,road_allignment_gentle horizontal curve,road_allignment_sharp reverse curve,road_allignment_steep grade downward with mountainous terrain,road_allignment_steep grade upward with mountainous terrain,road_allignment_tangent road with flat terrain,road_allignment_tangent road with mild grade and flat terrain,road_allignment_tangent road with mountainous terrain and,road_allignment_tangent road with rolling terrain,types_of_junction_crossing,types_of_junction_no junction,types_of_junction_o shape,types_of_junction_other,types_of_junction_t shape,types_of_junction_x shape,types_of_junction_y shape,road_surface_type_aspahlt roads,road_surface_type_asphalt roads,road_surface_type_earth roads,road_surface_type_gravel roads,road_surface_type_other,road_surface_conditions_dry,road_surface_conditions_flood over 3cm. deep,road_surface_conditions_snow,road_surface_conditions_wet or damp,light_conditions_darkness,light_conditions_daylight,weather_conditions_cloudy,weather_conditions_fog or mist,weather_conditions_normal,weather_conditions_other,weather_conditions_raining,weather_conditions_raining and windy,weather_conditions_snow,weather_conditions_windy,type_of_collision_collision with animals,type_of_collision_collision with pedestrians,type_of_collision_collision with roadside objects,type_of_collision_collision with roadside-parked vehicles,type_of_collision_fall from vehicles,type_of_collision_other,type_of_collision_rollover,type_of_collision_vehicle with vehicle collision,type_of_collision_with train,vehicle_movement_entering a junction,vehicle_movement_getting off,vehicle_movement_going straight,vehicle_movement_moving backward,vehicle_movement_other,vehicle_movement_overtaking,vehicle_movement_parked,vehicle_movement_reversing,vehicle_movement_stopping,vehicle_movement_turnover,vehicle_movement_u-turn,vehicle_movement_waiting to go,casualty_class_driver or rider,casualty_class_passenger,casualty_class_pedestrian,sex_of_casualty_female,sex_of_casualty_male,work_of_casuality_driver,work_of_casuality_employee,work_of_casuality_other,work_of_casuality_self-employed,work_of_casuality_student,work_of_casuality_unemployed,fitness_of_casuality_blind,fitness_of_casuality_deaf,fitness_of_casuality_normal,fitness_of_casuality_other,pedestrian_movement_crossing from driver's nearside,pedestrian_movement_crossing from nearside - masked by parked or stationot a pedestrianry vehicle,pedestrian_movement_crossing from offside - masked by parked or stationot a pedestrianry vehicle,"pedestrian_movement_in carriageway, stationot a pedestrianry - not crossing (standing or playing)","pedestrian_movement_in carriageway, stationot a pedestrianry - not crossing (standing or playing) - masked by parked or stationot a pedestrianry vehicle",pedestrian_movement_not a pedestrian,pedestrian_movement_unknown or other,"pedestrian_movement_walking along in carriageway, back to traffic","pedestrian_movement_walking along in carriageway, facing traffic",cause_of_accident_changing lane to the left,cause_of_accident_changing lane to the right,cause_of_accident_driving at high speed,cause_of_accident_driving carelessly,cause_of_accident_driving to the left,cause_of_accident_driving under the influence of drugs,cause_of_accident_drunk driving,cause_of_accident_getting off the vehicle improperly,cause_of_accident_improper parking,cause_of_accident_moving backward,cause_of_accident_no distancing,cause_of_accident_no priority to pedestrian,cause_of_accident_no priority to vehicle,cause_of_accident_other,cause_of_accident_overloading,cause_of_accident_overspeed,cause_of_accident_overtaking,cause_of_accident_overturning,cause_of_accident_turnover,vehicle_driver_relation_employee,vehicle_driver_relation_other,vehicle_driver_relation_owner
0,1,2.0,6.0,3.0,5.0,0,2,2,,,1,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False
1,1,3.0,4.0,6.0,4.0,0,2,2,,,1,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False
2,1,2.0,4.0,3.0,,0,2,2,3.0,3.0,2,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
3,7,2.0,4.0,5.0,,0,2,2,2.0,3.0,1,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
4,7,2.0,4.0,4.0,4.0,0,2,2,,,1,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,True,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False


In [87]:
# Get df with only boolean columns into a list
bool_cols = [col for col in dump_df.columns if dump_df[col].dtype == 'bool']
# Convert boolean columns to int
dump_df[bool_cols] = dump_df[bool_cols].astype(int)

In [88]:
dump_df.head()

Unnamed: 0,day_of_week,age_band_of_driver,educational_level,driving_experience,service_year_of_vehicle,defect_of_vehicle,number_of_vehicles_involved,number_of_casualties,age_band_of_casualty,casualty_severity,accident_severity,sex_of_driver_female,sex_of_driver_male,type_of_vehicle_automobile,type_of_vehicle_bajaj,type_of_vehicle_bicycle,type_of_vehicle_long lorry,type_of_vehicle_lorry (11?40q),type_of_vehicle_lorry (41?100q),type_of_vehicle_motorcycle,type_of_vehicle_other,type_of_vehicle_pick up upto 10q,type_of_vehicle_public (12 seats),type_of_vehicle_public (13?45 seats),type_of_vehicle_public (> 45 seats),type_of_vehicle_ridden horse,type_of_vehicle_special vehicle,type_of_vehicle_stationwagen,type_of_vehicle_taxi,type_of_vehicle_turbo,owner_of_vehicle_governmental,owner_of_vehicle_organization,owner_of_vehicle_other,owner_of_vehicle_owner,area_accident_occured_church areas,area_accident_occured_hospital areas,area_accident_occured_industrial areas,area_accident_occured_market areas,area_accident_occured_office areas,area_accident_occured_other,area_accident_occured_outside rural areas,area_accident_occured_recreational areas,area_accident_occured_residential areas,area_accident_occured_rural village areas,area_accident_occured_school areas,lanes_or_medians_divided two-way,lanes_or_medians_double carriageway (median),lanes_or_medians_one way,lanes_or_medians_other,lanes_or_medians_undivided two way,road_allignment_escarpments,road_allignment_gentle horizontal curve,road_allignment_sharp reverse curve,road_allignment_steep grade downward with mountainous terrain,road_allignment_steep grade upward with mountainous terrain,road_allignment_tangent road with flat terrain,road_allignment_tangent road with mild grade and flat terrain,road_allignment_tangent road with mountainous terrain and,road_allignment_tangent road with rolling terrain,types_of_junction_crossing,types_of_junction_no junction,types_of_junction_o shape,types_of_junction_other,types_of_junction_t shape,types_of_junction_x shape,types_of_junction_y shape,road_surface_type_aspahlt roads,road_surface_type_asphalt roads,road_surface_type_earth roads,road_surface_type_gravel roads,road_surface_type_other,road_surface_conditions_dry,road_surface_conditions_flood over 3cm. deep,road_surface_conditions_snow,road_surface_conditions_wet or damp,light_conditions_darkness,light_conditions_daylight,weather_conditions_cloudy,weather_conditions_fog or mist,weather_conditions_normal,weather_conditions_other,weather_conditions_raining,weather_conditions_raining and windy,weather_conditions_snow,weather_conditions_windy,type_of_collision_collision with animals,type_of_collision_collision with pedestrians,type_of_collision_collision with roadside objects,type_of_collision_collision with roadside-parked vehicles,type_of_collision_fall from vehicles,type_of_collision_other,type_of_collision_rollover,type_of_collision_vehicle with vehicle collision,type_of_collision_with train,vehicle_movement_entering a junction,vehicle_movement_getting off,vehicle_movement_going straight,vehicle_movement_moving backward,vehicle_movement_other,vehicle_movement_overtaking,vehicle_movement_parked,vehicle_movement_reversing,vehicle_movement_stopping,vehicle_movement_turnover,vehicle_movement_u-turn,vehicle_movement_waiting to go,casualty_class_driver or rider,casualty_class_passenger,casualty_class_pedestrian,sex_of_casualty_female,sex_of_casualty_male,work_of_casuality_driver,work_of_casuality_employee,work_of_casuality_other,work_of_casuality_self-employed,work_of_casuality_student,work_of_casuality_unemployed,fitness_of_casuality_blind,fitness_of_casuality_deaf,fitness_of_casuality_normal,fitness_of_casuality_other,pedestrian_movement_crossing from driver's nearside,pedestrian_movement_crossing from nearside - masked by parked or stationot a pedestrianry vehicle,pedestrian_movement_crossing from offside - masked by parked or stationot a pedestrianry vehicle,"pedestrian_movement_in carriageway, stationot a pedestrianry - not crossing (standing or playing)","pedestrian_movement_in carriageway, stationot a pedestrianry - not crossing (standing or playing) - masked by parked or stationot a pedestrianry vehicle",pedestrian_movement_not a pedestrian,pedestrian_movement_unknown or other,"pedestrian_movement_walking along in carriageway, back to traffic","pedestrian_movement_walking along in carriageway, facing traffic",cause_of_accident_changing lane to the left,cause_of_accident_changing lane to the right,cause_of_accident_driving at high speed,cause_of_accident_driving carelessly,cause_of_accident_driving to the left,cause_of_accident_driving under the influence of drugs,cause_of_accident_drunk driving,cause_of_accident_getting off the vehicle improperly,cause_of_accident_improper parking,cause_of_accident_moving backward,cause_of_accident_no distancing,cause_of_accident_no priority to pedestrian,cause_of_accident_no priority to vehicle,cause_of_accident_other,cause_of_accident_overloading,cause_of_accident_overspeed,cause_of_accident_overtaking,cause_of_accident_overturning,cause_of_accident_turnover,vehicle_driver_relation_employee,vehicle_driver_relation_other,vehicle_driver_relation_owner
0,1,2.0,6.0,3.0,5.0,0,2,2,,,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
1,1,3.0,4.0,6.0,4.0,0,2,2,,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
2,1,2.0,4.0,3.0,,0,2,2,3.0,3.0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,7,2.0,4.0,5.0,,0,2,2,2.0,3.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,7,2.0,4.0,4.0,4.0,0,2,2,,,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0


In [89]:
dump_df.shape

(12316, 152)

In [92]:
# To csv
dump_df.to_csv(f'{DATA_PATH}/expanded_data.csv', index=False)