Extracting the Trips

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

df = pd.read_csv('cleaned_bulk_data.csv',index_col=0)
df['time_stamp'] = pd.to_datetime(df['time_stamp'])
df = df.sort_values(by='time_stamp', ascending=True)

trips = []
trip = []
in_trip = False
for idx, row in df.iterrows():  #trip starts when engine_rpm increases from zero (engine starts) and ends when engine_rpm again reaches zero (engine turns off)
    if row['engine_rpm'] > 0:
        if not in_trip:         #checking if the car was already in trip or not
            in_trip = True      #starting a new trip as car was not in a trip
            trip = []
        trip.append(row)
    elif row['engine_rpm'] == 0:
        if in_trip:             #since there are many zero value, so checking if the car was already in a trip if yes then end the trip
            in_trip = False
            if trip:
                trips.append(pd.DataFrame(trip)) #adding the trip to the trips list


Preparing the Data

In [3]:
for i in trips:
    duration = (i.iloc[-1]['time_stamp'] - i.iloc[0]['time_stamp']).total_seconds()
    i['duration (mins)'] = duration / 60

    # i['time_diff'] = i['time_stamp'].diff().dt.total_seconds()
    # i['time_till_speed_continued'] = i['time_diff'].shift(-1)

    i['time_till_speed_continued'] = 2
    i['final_speed'] = i['vehicle_speed'].shift(-1)
    i['acceleration'] = ((i['final_speed']- i['vehicle_speed'])*1000/3600) / i['time_till_speed_continued']
    i['distance'] = round(((i['vehicle_speed']*1000/3600)*i['time_till_speed_continued']) + (0.5*i['acceleration']*(i['time_till_speed_continued']**2)),2)
    i['distance'] = i['distance']/1000

    temp = i.iloc[-1]['vehicle_speed']*1000/3600
    i.at[i.index[-1],'acceleration'] = (0-temp)/2
    i.at[i.index[-1],'distance'] = round(temp*2 + 0.5*((0-temp)/2)*4) /1000


    #older approach calculating fuel consumption of the entire trip
    # maf = (i['mass_air_flow_rate'] *i['time_till_speed_continued']).sum() / i['time_till_speed_continued'].sum() /1000
    # afr = i['o_s1_b1_fuel_air_equivalence_ratio'].mean()

    # fuel_mass_rate = maf / afr
    # fuel_consumption_kg = fuel_mass_rate * duration

    # i['fuel_consumption (liters)'] = fuel_consumption_kg / 0.75

    #newer approach calculating the fuel consumed for the interval of 2 seconds (as each reading is separated by 2 seconds)
    i['fuel_mass_flow_rate'] = (i['mass_air_flow_rate']/1000) / i['o_s1_b1_fuel_air_equivalence_ratio']
    i['fuel_consumption (liters)'] = (i['fuel_mass_flow_rate'] * 2) / 0.75 # fuel mass flow rate * 2 gives fuel mass flow rate for
     #the 2 seconds interval in kg converting it to liters by dividing with the density of fuel = 0.75 kg/L

lst = []
for i in range(len(trips)):
    if trips[i]['vehicle_speed'].mean() != 0:
        lst.append(trips[i])

df = pd.DataFrame()
for i in range(len(lst)):
    if lst[i]['vehicle_speed'].mean() != 0:
        lst[i]['trip'] = i+1
        df = pd.concat([df,lst[i]])


df = df.set_index('trip')


Extracting Summary Data

In [8]:
df['month'] = df['time_stamp'].dt.month
df['week'] = df['time_stamp'].dt.isocalendar().week

ch = input("Enter the Time Frame for Summary Data (month or week) :")

summary = { 'timeframe':[], 'average_speed':[], 'max_speed':[], 'total_drive_time':[], 'distance':[], 
                'fuel':[], 'mileage':[], 'coolant_temp':[] }

data = df.groupby(ch)
for time, group in data:
    summary['timeframe'].append(time)

    stats = group.groupby('trip').agg({'fuel_consumption (liters)':'sum','duration (mins)':'mean'}).reset_index()
    fuel = stats['fuel_consumption (liters)'].sum()
    dist = group['distance'].sum()

    coolant_temp = group.loc[group['engine_coolant_temperature']!=0,'engine_coolant_temperature'].mean()

    summary['average_speed'].append(round(group.loc[group['vehicle_speed']!=0,'vehicle_speed'].mean(),2))
    summary['max_speed'].append(group['vehicle_speed'].max())
    summary['total_drive_time'].append(round(stats['duration (mins)'].sum(),2))
    summary['distance'].append(round(dist,2))
    summary['fuel'].append(round(fuel,2))
    summary['mileage'].append(round(dist/fuel,2))
    summary['coolant_temp'].append(round(coolant_temp))


summary_df = pd.DataFrame(summary)
print(summary_df)

    timeframe  average_speed  max_speed  total_drive_time  distance   fuel  \
0           5          27.82       77.0            133.22     49.75   4.85   
1           6          25.02       67.0             14.33      4.90   0.56   
2           7          25.21       76.0             42.13     20.07   2.21   
3           8          24.37       72.0            339.78    103.69   9.62   
4           9          23.08       93.0            369.35    123.69  10.51   
5          10          24.76      100.0            611.52    218.14  18.72   
6          11          23.64       93.0            311.55    107.92   9.80   
7          12          32.99      102.0            585.33    280.38  21.06   
8          13          27.49       89.0            297.63    116.33   9.28   
9          14          24.72       89.0            511.33    306.70  28.19   
10         15          25.09       90.0            329.27    111.39  10.38   
11         16          26.87       85.0            258.77    101

Plotting the historic Data

In [9]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['mileage'], 
                         mode='lines', name='Mileage', yaxis='y1'))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['fuel'], 
                         mode='lines', name='Fuel Consumption', yaxis='y2', opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['distance'], 
                         mode='lines', name='Distance Travelled', yaxis='y3',opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['coolant_temp'], 
                         mode='lines', name='Average Engine Coolant Temperature', yaxis='y4',opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['total_drive_time'], 
                         mode='lines', name='Total Driving Time', yaxis='y5'))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['max_speed'], 
                         mode='lines', name='Maximum speed', yaxis='y6'))


fig.update_layout(
    title=f'Summary Data by {ch.capitalize()}',
    xaxis=dict(title=ch.capitalize()),

    yaxis=dict(
        title="Mileage",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
    ),

    yaxis2=dict(
        overlaying="y",
        showticklabels=False
    ),
    
    yaxis3=dict(
        overlaying="y",
        showticklabels=False
    ),
    
    yaxis4=dict(
        overlaying="y",
        showticklabels=False
    ),

    yaxis5=dict(
        overlaying="y",
        showticklabels=False
    ),
    yaxis6=dict(
        overlaying="y",
        showticklabels=False
    ),

    legend_title_text='Parameters'
)

fig.show()
