This file is used for processing of data to bring it in the final shape.

In [38]:
%autosave 60

Autosaving every 60 seconds


In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
from datetime import datetime
from sklearn.preprocessing import LabelEncoder

import google.datalab.storage as storage
from io import BytesIO

In [39]:
BUCKET_NAME = "msil_raw"
FOLDER_NAME = "training_data"
FILE_NAME = "msil_data.csv"

In [40]:
# setting up the parameters
plt.rcParams["figure.figsize"] = (10, 10)
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("precision", 15)
sns.set_style("darkgrid")

In [5]:
mybucket = storage.Bucket(BUCKET_NAME)
data_csv = mybucket.object(FOLDER_NAME + "/" + FILE_NAME)

uri = data_csv.uri
%gcs read --object $uri --variable data

data = pd.read_csv(BytesIO(data))
data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVODO,EVODOH,EVSMA_EWMA,EVSMA_delta,Driver name
0,0,0,352891066262722.0,2018,11,17,8,39,18,1,0.0,0.0,10,19.0,16,17,-7.0,213,16,20.0,21.5,0.0,688,0.0,93.0,0.0,OMKANWAR
1,1,1,352891066262722.0,2018,11,17,8,39,19,1,1000.0,0.0,10,19.0,17,17,-5.0,17,17,20.0,21.5,0.0,688,0.0,93.0,0.0,OMKANWAR
2,2,2,352891066262722.0,2018,11,17,8,39,20,1,2000.0,0.0,10,19.0,17,17,-8.0,17,17,20.0,21.5,0.0,688,0.0,93.0,0.0,OMKANWAR
3,3,3,352891066262722.0,2018,11,17,8,39,21,1,3000.0,0.0,10,19.0,17,17,-9.0,17,17,20.0,21.5,0.0,688,0.0,93.0,0.0,OMKANWAR
4,4,4,352891066262722.0,2018,11,17,8,39,22,1,4000.0,0.0,10,19.0,17,17,-4.0,17,17,20.0,21.5,0.0,688,0.0,93.0,0.0,OMKANWAR


In [6]:
# getting the unique IMEIs from the dataset
imei_list = list(data["IMEI"].unique())

In [7]:
# encoding the IMEI number
data["IMEI"] = data["IMEI"].map({
  352891066262722.0:1,
  352891066263282.0:2,
  358272088699007.0:3,
  358272088699072.0:4,
  358272088709954.0:5,
  358272088715043.0:6,
  358272088715191.0:7,
  358272088716215.0:8,
  358272088718575.0:9
})

In [8]:
# getting the unique Driver Name from the dataset
driver_list = list(data["Driver name"].unique())
driver_list

['OMKANWAR',
 'NONAME',
 'GOVIND',
 'NAVDEEP',
 'MAHINDER',
 'DEEPAK',
 'KAMAL',
 'NAVEEN',
 'AMITKUMAR',
 'MOHINDER',
 'MANOJ',
 'MOHIT',
 'MANISH',
 'GAJRAJ',
 'KULDEEP',
 'JAGDISH',
 'HIMMATSINGH',
 'MAHIPAL',
 'ASHISH',
 'NITIN',
 'VISHAL',
 'PRIYANK',
 'BHARAT',
 'SHASHANK',
 'RAJ',
 'SRAINA',
 'ABHIJIT',
 'TARUN',
 'PAVAN']

In [9]:
# encoding the Driver Name
data["Driver name"] = data["Driver name"].map({
  'OMKANWAR': 1,
  'NONAME': 0,
  'GOVIND': 2,
  'NAVDEEP': 3,
  'MAHINDER': 4,
  'DEEPAK': 5,
  'KAMAL': 6,
  'NAVEEN': 7,
  'AMITKUMAR': 8,
  'MOHINDER': 9,
  'MANOJ': 10,
  'MOHIT': 11,
  'MANISH': 12,
  'GAJRAJ': 13,
  'KULDEEP': 14,
  'JAGDISH': 15,
  'HIMMATSINGH': 16,
  'MAHIPAL': 17,
  'ASHISH': 18,
  'NITIN': 19,
  'VISHAL': 20,
  'PRIYANK': 21,
  'BHARAT': 22,
  'SHASHANK': 23,
  'RAJ': 24,
  'SRAINA': 25,
  'ABHIJIT': 26,
  'TARUN': 27,
  'PAVAN': 28
})

In [10]:
# encoding the trip number universally
data["tp"] = data["IMEI"].astype("str") + data["Year"].astype("str") + data["Month"].astype("str") + data["Day"].astype("str") + data["tp"].astype("str")

encoder = LabelEncoder()
data["tp"] = encoder.fit_transform(data["tp"])

In [11]:
data.sample(10)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVODO,EVODOH,EVSMA_EWMA,EVSMA_delta,Driver name
1528461,1528550,1528550,2,2019,1,17,18,45,47,637,3878000.0,0.0,10,20.0,23,33,-80.0,28,32,32.5,35.0,0.0,8478,21.849188055555555,66.05937043663289,0.002012557173998,3
941620,941679,941679,2,2018,12,17,22,27,26,403,1359000.0,25.9141,11,14.0,26,30,-106.0,24,28,34.0,38.0,11.263055555555557,4143,9.299236791666669,82.10492130947094,0.006946485066806,3
4163840,4164109,4212709,7,2018,12,6,15,18,29,1822,20222000.0,0.0,10,24.5,34,28,36.0,32,27,30.0,31.5,0.0,1317,0.0,26.881436573162727,0.024358082265667,23
1990646,1990735,1990735,3,2018,12,14,11,15,13,876,164000.0,23.8516,11,19.5,23,31,-156.0,29,28,22.5,24.0,-0.043333333333331,4068,0.615307083333334,17.0729672694881,0.002473466762311,11
805956,806015,806015,2,2018,12,4,19,14,16,534,6395000.0,45.1562,11,21.5,28,36,-127.0,30,34,34.0,35.0,-1.150277777777791,3363,19.662119722222226,70.63601072421821,0.004610533024348,5
210490,210528,210528,1,2018,12,11,10,43,14,67,948000.0,1.5156,11,18.5,26,38,84.0,30,30,21.5,23.0,4.21,2457,10.002315861111107,59.75210578236406,0.001766297707256,2
2616959,2617092,2617092,4,2018,12,7,15,53,31,1335,591000.0,30.0625,11,23.5,32,48,407.0,36,36,32.5,34.0,-4.361944444444443,3631,5.183782708333332,74.85060442422555,0.001715404211026,8
446435,446484,446484,1,2019,1,11,15,19,23,201,2244000.0,5.2969,11,25.0,31,38,108.0,39,35,21.5,23.5,1.649444444444444,4471,16.79235676388889,84.53639325063078,0.004623500021381,2
2206868,2206957,2206957,3,2019,1,12,15,11,26,964,436000.0,24.1406,11,21.5,31,39,-176.0,40,41,20.0,22.5,0.824444444444437,6534,1.614173055555556,89.26663936069895,0.002258961379624,11
1963521,1963610,1963610,3,2018,12,12,16,33,3,868,579000.0,36.8125,11,20.0,25,42,-104.0,33,30,29.0,30.0,-11.545277777777786,3771,8.200991847222221,47.65967754244559,0.005412798049008,11


In [12]:
data["tp"].max()

2122

In [13]:
def impute_temp(trip_list):
    list_ = []
    for i in range(0, len(trip_list)):
        temp = trip_list[0]
        list_.append(temp)
    return list_

def impute_temp_df(data, colname):
    temp_list = []
    for i in list(data["tp"].unique()):
        trip_df = data[data["tp"] == i]
        trip_list = list(trip_df[colname])
        trip_list = impute_temp(trip_list)
        temp_list.append(trip_list)

    flat_list = []
    for sublist in temp_list:
        for item in sublist:
            flat_list.append(item)
    return flat_list
  
def replace_df(dataframe, attribute_name, list_name):
    """
    inputs - dataframe name, list name (should be of same length), name of the predictor to be replaced
    output - dataframe with replaced column
    """
    col_list = list(dataframe.columns)
    loc = col_list.index(attribute_name)
    dataframe = dataframe.drop(columns = attribute_name)
    dataframe.insert(loc, attribute_name, list_name)
    return dataframe

In [14]:
# rate of change in the dataset
def change_rate(feat_list):
    cr_list = [0]
    cr_list = cr_list*60
    for i in range(60, len(feat_list)):
        delta = (feat_list[i] - feat_list[i-60])/60
        cr_list.append(delta)
    return cr_list

def change_rate_df(dataframe, feature_name):
    change_list = []
    change_list_2 = []
    debug_0 = []
    debug_1 = []
    debug_2 = []
    for i in list(dataframe["tp"].unique()):
        temp_df = dataframe[dataframe["tp"] == i]
        #print(i)
        temp_list = list(temp_df[feature_name])
        cr_frame = change_rate(temp_list)
        cr_frame_2 = change_rate(cr_frame)
        debug_1.append(len(cr_frame))
        debug_2.append(len(temp_df))
        debug_0.append(i)
        change_list.append(cr_frame)
        change_list_2.append(cr_frame_2)
  
    debug_df = pd.DataFrame({
        "trip number": debug_0,
        "len list": debug_1,
        "len trip": debug_2
    })
    flat_list_1 = []
    for sublist in change_list:
        for item in sublist:
            flat_list_1.append(item)
  
    flat_list_2 = []
    for sublist in change_list_2:
        for item in sublist:
            flat_list_2.append(item)
    return flat_list_1, flat_list_2
    #return debug_df

In [15]:
EVIRT_list = impute_temp_df(data, "EVIRT_Min")
data = replace_df(data, "EVIRT_Min", EVIRT_list)

EVDI2_list = impute_temp_df(data, "EVDI2")
data = replace_df(data, "EVDI2", EVDI2_list)

EVIGM_list = impute_temp_df(data, "EVIGM_Latest")
data = replace_df(data, "EVIGM_Latest", EVIGM_list)

EVCOM_list = impute_temp_df(data, "EVCOM_Latest")
data = replace_df(data, "EVCOM_Latest", EVCOM_list)

EVBMI_list = impute_temp_df(data, "EVBMI_Latest")
data = replace_df(data, "EVBMI_Latest", EVBMI_list)

EVBMA_list = impute_temp_df(data, "EVBMA_Latest")
data = replace_df(data, "EVBMA_Latest", EVBMA_list)

EVSMA_in_list = impute_temp_df(data, "EVSMA_EWMA")
data.insert(24, "EVSMA_in", EVSMA_in_list)

In [16]:
data = data.drop(columns = ["Unnamed: 0", "Unnamed: 0.1"])

In [17]:
data.head()

Unnamed: 0,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVODO,EVODOH,EVSMA_in,EVSMA_EWMA,EVSMA_delta,Driver name
0,1,2018,11,17,8,39,18,0,0.0,0.0,10,19.0,16,17,-7.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
1,1,2018,11,17,8,39,19,0,1000.0,0.0,10,19.0,16,17,-5.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
2,1,2018,11,17,8,39,20,0,2000.0,0.0,10,19.0,16,17,-8.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
3,1,2018,11,17,8,39,21,0,3000.0,0.0,10,19.0,16,17,-9.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
4,1,2018,11,17,8,39,22,0,4000.0,0.0,10,19.0,16,17,-4.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1


In [18]:
data = data[data["tp"] != 1261]
data = data[data["tp"] != 1172]
data = data[data["tp"] != 1043]
data = data[data["tp"] != 266]

In [19]:
data.head()

Unnamed: 0,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVODO,EVODOH,EVSMA_in,EVSMA_EWMA,EVSMA_delta,Driver name
0,1,2018,11,17,8,39,18,0,0.0,0.0,10,19.0,16,17,-7.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
1,1,2018,11,17,8,39,19,0,1000.0,0.0,10,19.0,16,17,-5.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
2,1,2018,11,17,8,39,20,0,2000.0,0.0,10,19.0,16,17,-8.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
3,1,2018,11,17,8,39,21,0,3000.0,0.0,10,19.0,16,17,-9.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1
4,1,2018,11,17,8,39,22,0,4000.0,0.0,10,19.0,16,17,-4.0,213,16,20.0,21.5,0.0,688,0.0,93.0,93.0,0.0,1


In [20]:
data.shape

(4764039, 26)

In [21]:
# Adding columns with EVVSP_change, EVVSP_delta, EVVAC_change and EVVAC_delta
EVVSP_change, EVVSP_change_rate = change_rate_df(data, "EVVSP")
EVVAC_change, EVVAC_change_rate = change_rate_df(data, "EVVAC")
data.insert(9, "EVVSP_delta", EVVSP_change)
data.insert(10, "EVVSP_change", EVVSP_change_rate)
data.insert(20, "EVVAC_delta", EVVAC_change)
data.insert(21, "EVVAC_change", EVVAC_change_rate)

In [22]:
# selecting columns to be used in model training
data = data[['IMEI', 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Seconds', "tp", 'sp', 'EVVSP', 'EVVSP_delta', 'EVVSP_change', 
             'EVGPO', 'EVOAS', 'EVIGM_Latest', 'EVCOM_Latest', 'EVACO_Z', 'EVIRT_Min', 'EVDI2', 'EVBMI_Latest', 
             'EVBMA_Latest', 'EVVAC', 'EVVAC_delta', 'EVVAC_change', 'EVODO', 'EVODOH', "Driver name", 'EVSMA_in', "EVSMA_EWMA", "EVSMA_delta"]]

In [23]:
data.head()

Unnamed: 0,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVVSP_delta,EVVSP_change,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVVAC_delta,EVVAC_change,EVODO,EVODOH,Driver name,EVSMA_in,EVSMA_EWMA,EVSMA_delta
0,1,2018,11,17,8,39,18,0,0.0,0.0,0.0,0.0,10,19.0,16,17,-7.0,213,16,20.0,21.5,0.0,0.0,0.0,688,0.0,1,93.0,93.0,0.0
1,1,2018,11,17,8,39,19,0,1000.0,0.0,0.0,0.0,10,19.0,16,17,-5.0,213,16,20.0,21.5,0.0,0.0,0.0,688,0.0,1,93.0,93.0,0.0
2,1,2018,11,17,8,39,20,0,2000.0,0.0,0.0,0.0,10,19.0,16,17,-8.0,213,16,20.0,21.5,0.0,0.0,0.0,688,0.0,1,93.0,93.0,0.0
3,1,2018,11,17,8,39,21,0,3000.0,0.0,0.0,0.0,10,19.0,16,17,-9.0,213,16,20.0,21.5,0.0,0.0,0.0,688,0.0,1,93.0,93.0,0.0
4,1,2018,11,17,8,39,22,0,4000.0,0.0,0.0,0.0,10,19.0,16,17,-4.0,213,16,20.0,21.5,0.0,0.0,0.0,688,0.0,1,93.0,93.0,0.0


In [25]:
# replacing and encoding the EVGPO values
data["EVGPO"] = data["EVGPO"].map({
  10: "E",
  8: "E",
  9: "E",
  -1: "E",
  1: "B"
})

data["EVGPO"] = data["EVGPO"].fillna("E")
data["EVGPO"] = data["EVGPO"].map({
  "E": 0,
  "B": 1
})

In [24]:
# final shape of data
data.shape

(4764039, 30)

In [27]:
data.sample(10)

Unnamed: 0,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVVSP_delta,EVVSP_change,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVVAC_delta,EVVAC_change,EVODO,EVODOH,Driver name,EVSMA_in,EVSMA_EWMA,EVSMA_delta
1108580,2,2018,12,25,14,50,57,492,788000.0,13.6875,0.228125,0.003802083333333,0,20.5,26,27,-128.0,-38,24,30.0,31.5,-1.801111111111113,-0.030018518518519,-0.000500308641975,5754,1.360880708333333,3,88.9,86.98451426703016,0.002864890407807
2395132,4,2018,11,17,16,54,26,1031,84000.0,0.0,0.0,0.0,0,28.0,39,42,-1004.0,215,43,30.0,31.5,0.0,0.0,0.0,1891,0.0,13,72.09999999999998,72.09999999999998,0.0
3732533,5,2018,12,21,18,43,18,1579,2673000.0,51.3594,0.059766666666667,-0.00475475,0,16.0,21,23,-30.0,213,22,18.0,19.0,3.515555555555553,0.077041666666667,0.003418055555556,2603,14.765663333333334,18,93.2,81.78582101763601,0.009688848055461
1950630,3,2018,12,12,9,1,25,862,648000.0,17.4688,0.291146666666667,0.004852444444444,0,19.0,15,16,-4.0,-38,15,16.5,17.5,0.217222222222229,0.00362037037037,6.0339506173e-05,3613,1.358151055555555,11,91.9,88.56737661158726,0.002283952935173
699355,2,2018,11,30,7,12,3,360,1913000.0,53.8906,0.75013,0.012875444444444,0,17.0,17,17,-108.0,-38,20,21.5,24.0,1.041666666666667,0.089699074074074,0.002061651234568,2778,13.167723305555556,3,92.0,79.50433518967105,0.003536786090535
3127675,4,2018,12,26,16,56,45,1289,4574000.0,0.0,0.0,0.0,0,25.5,25,28,436.0,215,27,30.0,31.5,-3.754444444444443,-0.062574074074074,-0.001042901234568,6771,11.74033677777778,16,92.6,74.58768782903903,0.002972468780996
2081976,3,2018,12,24,9,55,21,915,2084000.0,5.2578,0.00651,0.008511277777778,0,14.5,11,11,-156.0,-38,11,13.0,14.0,0.086666666666665,-0.202185185185185,-0.005865354938272,5256,15.369914388888883,11,93.0,77.23000212115586,0.001017021056128
183515,1,2018,12,8,11,1,47,189,2652000.0,14.6875,0.244791666666667,0.006371527777778,0,22.5,26,30,60.0,213,25,19.0,20.0,-2.539166666666665,-0.042319444444444,-0.001693904320988,2251,25.701540458333326,2,73.2,58.97553416068986,0.002560480023384
2315261,3,2019,1,23,11,13,18,989,2029000.0,19.7578,0.329296666666667,0.005488277777778,0,13.5,14,22,-170.0,-39,16,16.5,17.5,1.822777777777773,0.03037962962963,0.000506327160494,7617,25.324211083333328,11,59.3,41.35740128118094,0.001945806141727
1680099,2,2019,1,24,15,50,29,700,3596000.0,45.4688,0.441146666666667,0.002074666666667,0,20.0,27,31,-184.0,-37,29,35.0,36.5,-0.933055555555542,0.111041666666667,0.003960570987654,9411,22.13545830555556,7,91.5,71.57913471354533,0.002682532662561


In [28]:
data["tp"] = encoder.fit_transform(data["tp"])

In [29]:
data["tp"].max()

2118

In [30]:
trainset_final = data[data["tp"] < 1643]
validset_final = data[data["tp"].between(1643, 1743, inclusive = True)]
testset_final = data[data["tp"] > 1743]

In [33]:
print("Trainset = {}".format(len(trainset_final)))
print("Validset = {}".format(len(validset_final)))
print("Testset = {}".format(len(testset_final)))

Trainset = 3871645
Validset = 224878
Testset = 667516


In [34]:
# saving the trainset to Bucket
trainset_final.to_csv('trainset_final.csv', index = False)
!gsutil cp 'trainset_final.csv' 'gs://msil_raw/training_data/trainset_final.csv'
%gcs read --object gs://msil_raw/training_data/trainset_final.csv --variable trainset_final
df = pd.read_csv(BytesIO(trainset_final))

Copying file://trainset_final.csv [Content-Type=text/csv]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

- [1 files][889.8 MiB/889.8 MiB]   26.5 MiB/s                                   
Operation completed over 1 objects/889.8 MiB.                                    


In [35]:
# saving the validset to Bucket
validset_final.to_csv('validset_final.csv', index = False)
!gsutil cp 'validset_final.csv' 'gs://msil_raw/training_data/validset_final.csv'
%gcs read --object gs://msil_raw/training_data/validset_final.csv --variable validset_final
df3 = pd.read_csv(BytesIO(validset_final))

Copying file://validset_final.csv [Content-Type=text/csv]...
\ [1 files][ 49.5 MiB/ 49.5 MiB]                                                
Operation completed over 1 objects/49.5 MiB.                                     


In [36]:
# saving the testset to Bucket
testset_final.to_csv('testset_final.csv', index = False)
!gsutil cp 'testset_final.csv' 'gs://msil_raw/training_data/testset_final.csv'
%gcs read --object gs://msil_raw/training_data/testset_final.csv --variable testset_final
df3 = pd.read_csv(BytesIO(testset_final))

Copying file://testset_final.csv [Content-Type=text/csv]...
/ [1 files][141.7 MiB/141.7 MiB]                                                
Operation completed over 1 objects/141.7 MiB.                                    


---
## Selecting Trips from testset to be used as benchmark for Model Performance

In [42]:
mybucket = storage.Bucket(BUCKET_NAME)
data_csv = mybucket.object(FOLDER_NAME + "/" + "testset_final.csv")

uri = data_csv.uri
%gcs read --object $uri --variable data

testset = pd.read_csv(BytesIO(data))
testset.head()

Unnamed: 0,IMEI,Year,Month,Day,Hour,Minute,Seconds,tp,sp,EVVSP,EVVSP_delta,EVVSP_change,EVGPO,EVOAS,EVIGM_Latest,EVCOM_Latest,EVACO_Z,EVIRT_Min,EVDI2,EVBMI_Latest,EVBMA_Latest,EVVAC,EVVAC_delta,EVVAC_change,EVODO,EVODOH,Driver name,EVSMA_in,EVSMA_EWMA,EVSMA_delta
0,7,2018,11,26,18,53,20,1744,0.0,0.0,0.0,0.0,0,21.5,29,34,100.0,213,34,22.5,23.0,0.0,0.0,0.0,1110,0.0,23,82.29999999999998,82.29999999999998,0.0
1,7,2018,11,26,18,53,21,1744,1000.0,0.0,0.0,0.0,0,21.5,29,34,95.0,213,34,22.5,23.0,0.0,0.0,0.0,1110,0.0,23,82.29999999999998,82.29999999999998,0.0
2,7,2018,11,26,18,53,22,1744,2000.0,0.0,0.0,0.0,0,21.5,29,34,92.0,213,34,22.5,23.0,0.0,0.0,0.0,1110,0.0,23,82.29999999999998,82.29999999999998,0.0
3,7,2018,11,26,18,53,23,1744,3000.0,0.0,0.0,0.0,0,21.5,29,34,103.0,213,34,22.5,23.0,0.0,0.0,0.0,1110,0.0,23,82.29999999999998,82.29999999999998,0.0
4,7,2018,11,26,18,53,24,1744,4000.0,0.0,0.0,0.0,0,21.5,29,34,96.0,213,34,22.5,23.0,0.0,0.0,0.0,1110,0.0,23,82.29999999999998,82.29999999999998,0.0


In [45]:
VSP, VAC, ODOH, OAS, trip_number = [], [], [], [], []
for i in list(testset["tp"].unique()):
  temp_df = testset[testset["tp"] == i]
  tp_EVVSP_mean = temp_df["EVVSP"].mean()
  tp_EVVAC_mean = temp_df["EVVAC"].mean()
  tp_EVODOH_last_reading = temp_df["EVODOH"].iloc[-1]
  tp_EVOAS_mean = temp_df["EVOAS"].mean()
  tp = i
  trip_number.append(tp)
  VSP.append(tp_EVVSP_mean)
  VAC.append(tp_EVVAC_mean)
  ODOH.append(tp_EVODOH_last_reading)
  OAS.append(tp_EVOAS_mean)

perf_df = pd.DataFrame({
  "Trip": trip_number,
  "VSP": VSP,
  "VAC": VAC,
  "ODOH": ODOH,
  "OAS": OAS
})
perf_df = perf_df[["Trip", "VSP", "VAC", "OAS", "ODOH"]]
perf_df.sample(5)

Unnamed: 0,Trip,VSP,VAC,OAS,ODOH
229,2018,25.122161666666667,0.0,19.132777777777775,12.551648979027776
14,1758,0.0,0.0,23.394736842105264,0.0
348,2118,24.395757549019606,6.501698235713334e-17,16.41176470588235,20.736393916666664
125,1869,0.0,0.0,28.5,0.0
251,1989,0.0,0.0,16.5,0.0


In [46]:
perf_df = perf_df[perf_df["ODOH"] > 5]

In [48]:
print("Max velocity = {}".format(perf_df[perf_df["VSP"] == perf_df["VSP"].max()]["Trip"]))
print("Min velocity = {}".format(perf_df[perf_df["VSP"] == perf_df["VSP"].min()]["Trip"]))
print("Max acc = {}".format(perf_df[perf_df["VAC"] == perf_df["VAC"].max()]["Trip"]))
print("Min acc = {}".format(perf_df[perf_df["VAC"] == perf_df["VAC"].min()]["Trip"]))
print("Max OAS = {}".format(perf_df[perf_df["OAS"] == perf_df["OAS"].max()]["Trip"]))
print("Min OAS = {}".format(perf_df[perf_df["OAS"] == perf_df["OAS"].min()]["Trip"]))

Max velocity = 57    1780
Name: Trip, dtype: int64
Min velocity = 22    1814
Name: Trip, dtype: int64
Max acc = 217    1936
Name: Trip, dtype: int64
Min acc = 235    1973
Name: Trip, dtype: int64
Max OAS = 13    1757
Name: Trip, dtype: int64
Min OAS = 218    1937
Name: Trip, dtype: int64


### Trips on which performance of models need to be checked

| Specification | Trip Number | ODOH |
|---------------|-------------|------|
| VSP (max)     |    1780     | 60.5 |
| VSP (min)     |    1814     | 22.5 |
| VAC (max)     |    1936     | 6.32 |
| VAC (min)     |    1973     | 13.9 |
| OAS (max)     |    1757     | 27.7 |
| OAS (min)     |    1937     | 13.9 |
| Random 01     |    1889     | 8.4  |
| Random 02     |    2018     | 12.5 |
| Random 03     |    2011     | 10.1 |
| Random 04     |    1947     | 7.56 |
| Random 05     |    1860     | 19.8 |