In [1]:
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

import csv
import pandas as pd
import numpy as np

In [2]:
def to_week_num(s):
    s = s.strip().lower()
    
    if s == "sun":
        return 1
    elif s == "mon":
        return 2
    elif s == "tue":
        return 3
    elif s == "wed":
        return 4
    elif s == "thu":
        return 5
    elif s == "fri":
        return 6
    elif s == "sat":
        return 7
    else:
        return 0

In [3]:
def load_train_data(train_data_file):
    df_train = pd.read_csv(train_data_file, converters={"Weekday_of_Collision":to_week_num})
    df_train = df_train.fillna(0)


    Y_train = df_train['Collision_Severity']

    # 'Policing_Area', ---- skipped for now

    X_train = df_train[['Collision_Ref_No',  'Weekday_of_Collision','Day_of_Collision','Month_of_Collision', 'Hour_of_Collision', 'Carriageway_Type', 'Speed_Limit', 'Junction_Detail', 'Junction_Control','Ped_Crossing_HC', 'Ped_Crossing_PC', 'Light_Conditions', 'Weather_Conditions', 'Road_Surface_Conditions', 'Special_Conditions_at_Site' ]]


    #print(Y_train)
    #print(X_train)
    
    return X_train, Y_train

In [4]:
def train_the_model(X_train, Y_train, algo = 'DT'):
    if algo == 'DT':
        clf = tree.DecisionTreeClassifier()
    elif algo == 'GBM':
        clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0)
    elif algo == 'ADA':
        clf = AdaBoostClassifier(n_estimators=100)

    clf = clf.fit(X_train, Y_train)
    return clf

In [5]:
def load_test_data(test_data_file):
    df_test = pd.read_csv(test_data_file, converters={"Weekday_of_Collision":to_week_num})
    df_test = df_test.fillna(0)

    X_test = df_test[['Collision_Ref_No',  'Weekday_of_Collision','Day_of_Collision','Month_of_Collision', 'Hour_of_Collision', 'Carriageway_Type', 'Speed_Limit', 'Junction_Detail', 'Junction_Control','Ped_Crossing_HC', 'Ped_Crossing_PC', 'Light_Conditions', 'Weather_Conditions', 'Road_Surface_Conditions', 'Special_Conditions_at_Site' ]]
    
    return X_test


In [6]:
def predict_model(X_test, output_file):
    predited_val = clf.predict(X_test)
    index = 1
    with open(output_file, 'w', newline='') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        spamwriter.writerow(['S.No.', 'Collision_Severity'])
        for val in predited_val:
            spamwriter.writerow([index, val])
            index = index + 1

    csvfile.close()

In [7]:
X_train, Y_train = load_train_data('Accident_train.csv')
X_test = load_test_data('Accident_test.csv')

In [8]:
for algo in ['DT', 'GBM', 'ADA']:
    clf = train_the_model(X_train, Y_train, algo)
    out_file = 'using_' + algo + '_prediction.csv' 
    predict_model(X_test,out_file)