# AI FOR SEA

## Accepted Challenge: SAFETY

### PROBLEM STATEMENT:
Given the telematics data for each trip and the label if the trip is tagged as dangerous driving, derive a model that can detect dangerous driving trips.

Project Start time: 16 June 2019  21:24:00 


In [170]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import LogisticRegression
import os
import pickle

### Please change the path before running the code

In [4]:
Df_Safety=pd.read_csv("features/part-00000-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv")
Df_label=pd.read_csv("labels/part-00000-e9445087-aa0a-433b-a7f6-7f4c19d78ad6-c000.csv")

In [126]:
def read_directory():
    feature=[file for file in os.listdir(r"features") if file.endswith(".csv")]
    labels=[file for file in os.listdir(r"labels") if file.endswith(".csv")]
    return feature,labels

In [148]:
feature,label=read_directory()
Final_Data=pd.DataFrame(columns=['bookingID', 'Accuracy', 'Bearing', 'acceleration_x', 'acceleration_y',
                                'acceleration_z', 'gyro_x', 'gyro_y', 'gyro_z', 'second', 'Speed'])
for file in feature:
    df1=pd.read_csv(r"features/"+file)
    Final_Data=pd.concat([Final_Data,df1],axis=0)
label=pd.read_csv(r"labels/part-00000-e9445087-aa0a-433b-a7f6-7f4c19d78ad6-c000.csv")

In [149]:
Final_Data.shape

(16135561, 11)

### summarized the variance,mean and sum for each trips.

In [185]:
def Data_Preprocessing(Data,labels):
    #Feature engineering by variance mean method
    Data["Tot_Acceleration"]=np.sqrt( Data.acceleration_x**2+Data.acceleration_y**2+Data.acceleration_z**2)
    Data["Tot_Gyro"]=np.sqrt( Data.gyro_x**2+Data.gyro_y**2+Data.gyro_z**2)

    df_fe=pd.DataFrame(columns=['bookingID','Speed_Sum','Speed_Variance',
                                'Acceleration_Variance',"Accuracy_Variance",
                                "Bearing_Sum","Accuracy_Mean","Gyro_Variance",
                                "Seconds_Duration","Speed_Mean"])
    for i in list(set(Data.bookingID)):
        print("booking id:",i)
        df=Data[Data.bookingID==i]
        df=df.sort_values('second').reset_index(drop=True)
        #print(df.columns)
        df["speed1"]=np.abs(df['Speed'] - df['Speed'].shift(1))
        #df=df.sort_values('speed1',ascending=False).reset_index(drop=True)
        #df=df.head(30).reset_index(drop=True)
        second=max(df["second"])-min(df["second"])
        df_fe=df_fe.append({ 'bookingID':int(i),
                            'Speed_Sum':np.sum(df["speed1"]),
                            "Speed_Variance":np.var(df["Speed"]),
                           "Acceleration_Variance":np.var(df["Tot_Acceleration"]),
                            "Gyro_Variance":np.var(df["Tot_Gyro"]),
                            "Accuracy_Variance":np.var(df["Accuracy"]),
                            "Bearing_Sum":np.sum(df["Bearing"]),
                            "Accuracy_Mean":np.mean(df["Accuracy"]),
                            "Seconds_Duration":second,
                            "Speed_Mean":np.mean(df["speed1"]),
                            }
                           , ignore_index=True)
    DF_merged=pd.merge(df_fe, labels, on='bookingID').reset_index(drop=True)
    #Adding Total accelaration
    #DF_merged
    return DF_merged[['Speed_Sum','Speed_Variance',
            'Acceleration_Variance',"Accuracy_Variance",
            "Bearing_Sum","Accuracy_Mean","Gyro_Variance",
            "Seconds_Duration","Speed_Mean"]]

### This Function do the required preprocessing. Data Required: 1.Actual data with features 2.Label Data

In [183]:
Df_Final=Data_Preprocessing(Final_Data,label)

booking id: 824633720832
booking id: 111669149697
booking id: 601295421442
booking id: 627065225216
booking id: 489626271746
booking id: 1460288880645
booking id: 1185410973702
booking id: 987842478086
booking id: 1709396983813


KeyboardInterrupt: 

In [162]:
Df_Final.head()

Unnamed: 0,bookingID,Speed_Sum,Speed_Variance,Acceleration_Variance,Accuracy_Variance,Bearing_Sum,Accuracy_Mean,Gyro_Variance,Seconds_Duration,Speed_Mean,label
0,824633700000.0,548.951339,28.785262,0.226853,24.921631,217018.292925,9.046875,0.007925,1243.0,0.536609,0
1,111669100000.0,847.915858,83.405238,0.48204,4101.066504,265124.65284,30.631874,0.234568,2085.0,0.47609,1
2,601295400000.0,183.449241,50.424273,0.427532,0.544009,62797.0,4.085253,0.01572,508.0,0.361121,0
3,627065200000.0,375.959997,37.266488,0.279923,47.032516,72170.019192,12.665742,0.007239,783.0,0.522167,0
4,489626300000.0,489.29,74.190452,0.406267,935.204543,113606.0,5.768676,0.006996,959.0,0.654131,0


In [163]:
X=Df_Final[['Speed_Sum','Speed_Variance',
            'Acceleration_Variance',"Accuracy_Variance",
            "Bearing_Sum","Accuracy_Mean","Gyro_Variance",
            "Seconds_Duration","Speed_Mean"]]
y=Df_Final["label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 100)

In [164]:
def run_model(model, alg_name):
    # build the model on training data
    model.fit(X_train, y_train)
    # make predictions for test data
    y_pred = model.predict(X_test)
    # calculate the accuracy score
    accuracy =  accuracy_score(y_test, y_pred) * 100
    print(alg_name)
    print("Accuracy:",accuracy)
    print("---------------------------")

### Iterated Several model, still Decision Tree Works better

In [178]:
model = tree.DecisionTreeClassifier(criterion='entropy', max_depth=10)
run_model(model, "Decision Tree")


Decision Tree
Accuracy: 75.12437810945273
---------------------------


In [179]:
# save the model to disk
filename = 'Final_Model.sav'
pickle.dump(model, open(filename, 'wb'))

### Please Follow This For Prediction

In [181]:
#Lood the Model
#Please Make sure the path is correct
# save the model to disk
loaded_model = pickle.load(open("Final_Model.sav", 'rb'))
#LOAD YOUR DATA form PAth
test_data=pd.read_csv("")   #Add your path here
labels=pd.read_csv("")      #Add your path here
#RUM THE PRE PROCESSING 
Prediction_Data=Data_Preprocessing(test_data,labels)
predicted_Data = loaded_model.predict(Prediction_Data)

FileNotFoundError: [Errno 2] File b'' does not exist: b''