Extracting the Trips

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from datetime import datetime

df = pd.read_csv('cleaned_data_2021.csv',index_col=0)
df['time_stamp'] = pd.to_datetime(df['time_stamp'])
df = df.sort_values(by='time_stamp', ascending=True)

trips = []
trip = []
in_trip = False
for idx, row in df.iterrows():  #trip starts when engine_rpm increases from zero (engine starts) and ends when engine_rpm again reaches zero (engine turns off)
    if row['engine_rpm'] > 0:
        if not in_trip:         #checking if the car was already in trip or not
            in_trip = True      #starting a new trip as car was not in a trip
            trip = []
        trip.append(row)
    elif row['engine_rpm'] == 0:
        if in_trip:             #since there are many zero value, so checking if the car was already in a trip if yes then end the trip
            in_trip = False
            if trip:
                trips.append(pd.DataFrame(trip)) #adding the trip to the trips list


Preparing the Data

In [2]:
for i in trips:
    duration = (i.iloc[-1]['time_stamp'] - i.iloc[0]['time_stamp']).total_seconds()
    i['duration (mins)'] = duration / 60

    # i['time_diff'] = i['time_stamp'].diff().dt.total_seconds()
    # i['time_till_speed_continued'] = i['time_diff'].shift(-1)

    i['time_till_speed_continued'] = 2
    i['final_speed'] = i['vehicle_speed'].shift(-1)
    i['acceleration'] = ((i['final_speed']- i['vehicle_speed'])*1000/3600) / i['time_till_speed_continued']
    i['distance (km)'] = round(((i['vehicle_speed']*1000/3600)*i['time_till_speed_continued']) + (0.5*i['acceleration']*(i['time_till_speed_continued']**2)),2)
    i['distance (km)'] = i['distance (km)']/1000

    temp = i.iloc[-1]['vehicle_speed']*1000/3600
    i.at[i.index[-1],'acceleration'] = (0-temp)/2
    i.at[i.index[-1],'distance (km)'] = round(temp*2 + 0.5*((0-temp)/2)*4) /1000


    #older approach calculating fuel consumption of the entire trip
    # maf = (i['mass_air_flow_rate'] *i['time_till_speed_continued']).sum() / i['time_till_speed_continued'].sum() /1000
    # afr = i['o_s1_b1_fuel_air_equivalence_ratio'].mean()

    # fuel_mass_rate = maf / afr
    # fuel_consumption_kg = fuel_mass_rate * duration

    # i['fuel_consumption (liters)'] = fuel_consumption_kg / 0.75

    #newer approach calculating the fuel consumed for the interval of 2 seconds (as each reading is separated by 2 seconds)
    i['fuel_mass_flow_rate'] = (i['mass_air_flow_rate']/1000) / i['o_s1_b1_fuel_air_equivalence_ratio']
    i['fuel_consumption (liters)'] = (i['fuel_mass_flow_rate'] * 2) / 0.75 # fuel mass flow rate * 2 gives fuel mass flow rate for
     #the 2 seconds interval in kg converting it to liters by dividing with the density of fuel = 0.75 kg/L

lst = []
for i in range(len(trips)):
    if trips[i]['vehicle_speed'].mean() != 0:
        lst.append(trips[i])

df = pd.DataFrame()
for i in range(len(lst)):
    if lst[i]['vehicle_speed'].mean() != 0:
        lst[i]['trip'] = i+1
        df = pd.concat([df,lst[i]])


df = df.set_index('trip')
df['acceleration'] = df['acceleration']*3600/1000


Extracting Summary Data

In [3]:
df['month'] = df['time_stamp'].dt.to_period('M').dt.to_timestamp()
df['week'] = df['time_stamp'].dt.isocalendar().week
df['day'] = df['time_stamp'].dt.date

ch = input("Enter the Time Frame for Summary Data (month or week or day) :")

summary = { 'timeframe':[], 'average_speed':[], 'max_speed':[], 'total_drive_time':[], 'distance (km)':[], 
                'fuel':[], 'mileage':[], 'coolant_temp':[], 'engine_overheat':[] }
week = False
if ch == 'week':
    week = True

data = df.groupby(ch)
for time, group in data:
    if ch=="month":
        print("*****",time.strftime("%B %Y"),"*****")
    elif ch=='week':
        print('***** Week No.',time,"*****")
    else:
        print(time)

    if week:
        time = datetime.strptime(f'2021 {time} 1', '%Y %W %w')

    summary['timeframe'].append(time)

    stats = group.groupby('trip').agg({'fuel_consumption (liters)':'sum','duration (mins)':'mean'}).reset_index()
    fuel = stats['fuel_consumption (liters)'].sum()
    dist = group['distance (km)'].sum()

    coolant_temp = group.loc[group['engine_coolant_temperature']!=0,'engine_coolant_temperature'].mean()
    overheat = group[(group['engine_coolant_temperature'] >= 95)]
    overheat_percent = (len(overheat) / len(group)) * 100

    summary['average_speed'].append(round(group.loc[group['vehicle_speed']!=0,'vehicle_speed'].mean(),2))
    summary['max_speed'].append(group['vehicle_speed'].max())
    summary['total_drive_time'].append(round(stats['duration (mins)'].sum(),2))
    summary['distance (km)'].append(round(dist,2))
    summary['fuel'].append(round(fuel,2))
    summary['mileage'].append(round(dist/fuel,2))
    summary['coolant_temp'].append(round(coolant_temp,2))
    summary['engine_overheat'].append(round(overheat_percent,2))

    print('Average Speed:',round(group.loc[group['vehicle_speed']!=0,'vehicle_speed'].mean(),2),'kmph')
    print('Maximum Speed:',group['vehicle_speed'].max())
    print('Total Driving Time:',round(stats['duration (mins)'].sum(),2),'minutes')
    print('Distance Travelled:',round(dist,2),'kms')
    print('Fuel Consumed:',round(fuel,2),'liters')
    print('Mileage for the duration:',round(dist/fuel,2),'km/L')
    print('Engine Coolant Temperature:',round(coolant_temp,2),'°C')
    print('Engine Overheat Percentage:',round(overheat_percent,2),'%')
    print()

summary_df = pd.DataFrame(summary)

***** February 2021 *****
Average Speed: 25.48 kmph
Maximum Speed: 77.0
Total Driving Time: 529.47 minutes
Distance Travelled: 174.29 kms
Fuel Consumed: 16.84 liters
Mileage for the duration: 10.35 km/L
Engine Coolant Temperature: 84.68 °C
Engine Overheat Percentage: 0.4 %

***** March 2021 *****
Average Speed: 26.76 kmph
Maximum Speed: 102.0
Total Driving Time: 2000.02 minutes
Distance Travelled: 780.8 kms
Fuel Consumed: 64.25 liters
Mileage for the duration: 12.15 km/L
Engine Coolant Temperature: 85.81 °C
Engine Overheat Percentage: 0.61 %

***** April 2021 *****
Average Speed: 27.0 kmph
Maximum Speed: 101.0
Total Driving Time: 1427.13 minutes
Distance Travelled: 570.8 kms
Fuel Consumed: 49.33 liters
Mileage for the duration: 11.57 km/L
Engine Coolant Temperature: 86.13 °C
Engine Overheat Percentage: 1.01 %

***** May 2021 *****
Average Speed: 28.09 kmph
Maximum Speed: 100.0
Total Driving Time: 1328.3 minutes
Distance Travelled: 548.91 kms
Fuel Consumed: 48.89 liters
Mileage for the 

Plotting the historic Data

In [4]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['mileage'], 
                         mode='lines', name='Mileage', yaxis='y1'))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['fuel'], 
                         mode='lines', name='Fuel Consumption', yaxis='y2', opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['distance (km)'], 
                         mode='lines', name='Distance Travelled', yaxis='y3',opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['coolant_temp'], 
                         mode='lines', name='Average Engine Coolant Temperature', yaxis='y4',opacity=0.5))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['total_drive_time'], 
                         mode='lines', name='Total Driving Time', yaxis='y5'))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['max_speed'], 
                         mode='lines', name='Maximum speed', yaxis='y6'))


fig.update_layout(
    title=f'Summary Data by {ch.capitalize()}',
    xaxis=dict(title=ch.capitalize()),

    yaxis=dict(
        title="Mileage",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
    ),

    yaxis2=dict(
        overlaying="y",
        showticklabels=False
    ),
    
    yaxis3=dict(
        overlaying="y",
        showticklabels=False
    ),
    
    yaxis4=dict(
        overlaying="y",
        showticklabels=False
    ),

    yaxis5=dict(
        overlaying="y",
        showticklabels=False
    ),
    yaxis6=dict(
        overlaying="y",
        showticklabels=False
    ),

    legend_title_text='Parameters'
)

print(summary_df)
fig.show()


    timeframe  average_speed  max_speed  total_drive_time  distance (km)  \
0  2021-02-01          25.48       77.0            529.47         174.29   
1  2021-03-01          26.76      102.0           2000.02         780.80   
2  2021-04-01          27.00      101.0           1427.13         570.80   
3  2021-05-01          28.09      100.0           1328.30         548.91   
4  2021-06-01          28.49      119.0           1831.47         773.58   
5  2021-07-01          29.45      110.0           1684.00         731.60   
6  2021-08-01          27.13       98.0           1082.43         433.70   
7  2021-09-01          23.28      118.0           1239.65         307.20   
8  2021-10-01          35.30      142.0           3177.95        1609.87   
9  2021-11-01          23.99      108.0           2253.67         776.32   
10 2021-12-01          24.71      102.0           2069.25         749.81   

      fuel  mileage  coolant_temp  engine_overheat  
0    16.84    10.35         84.68 

Engine Overheat Plotting

In [5]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['engine_overheat'], 
                         mode='lines', name='Overheat', yaxis='y1'))

fig.add_trace(go.Scatter(x=summary_df['timeframe'], y=summary_df['distance (km)'], 
                         mode='lines', name='Distance', yaxis='y2', opacity=0.5))

fig.update_layout(
    title=f'Engine Overheat Percentage by {ch.capitalize()}',
    xaxis=dict(title=ch.capitalize()),

    yaxis=dict(
        title="Overheat",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
    ),

    yaxis2=dict(
        title="Distance",
        overlaying="y",
        side="right"
    ),
    legend=dict(
        x=0.5,
        y=-0.2,
        orientation="h",
        xanchor="center",
        yanchor="top",
    ),
)

fig.show()

Troubleshooting the issue in october data  
Don't run this cell


In [6]:
# start = '01-10-2021'
# end = '31-10-2021 23:59:59'

# start_date = datetime.strptime(start, "%d-%m-%Y")
# end_date = datetime.strptime(end, "%d-%m-%Y %H:%M:%S")

# df_10 = df[(df['time_stamp'] >= start_date) & (df['time_stamp'] <= end_date)]
# print(len(df_10))


# #checking for duplicate entries
# print(df_10.duplicated(subset=['bulk_id']).sum())  #it shows zero

# print(df_10.duplicated(subset=['time_stamp']).sum())


# #deleting the duplicated entries keeping the first occurance
# df_10 = df_10.drop_duplicates(subset=['time_stamp'], keep='first')
# print(len(df_10))

# #checking if there's other duplicate entries in the dataframe
# print('All Year Dataframe')
# print(len(df))
# print(df.duplicated(subset=['bulk_id']).sum())

# print(df.duplicated(subset=['time_stamp']).sum())

# print(df_10['distance (km)'].describe())
# print(df_10['fuel_consumption (liters)'].describe())


#October 10 , 2021 has too many readings
start = '10-10-2021'
end = '10-10-2021 23:59:59'

start_date = datetime.strptime(start, "%d-%m-%Y")
end_date = datetime.strptime(end, "%d-%m-%Y %H:%M:%S")

df_10_10_2021 = df[(df['time_stamp'] >= start_date) & (df['time_stamp'] <= end_date)].copy()
print(len(df_10_10_2021))

print(df_10_10_2021['time_stamp'].duplicated().sum()) #no duplicates
df_10_10_2021['hour'] = df_10_10_2021['time_stamp'].dt.hour
for idx, group in df_10_10_2021.groupby('hour'):
    print(idx, len(group))


#can't find any pattern

23550
0
6 1026
7 1800
8 1629
9 1219
10 1234
11 1459
12 1800
13 1794
14 978
15 1071
16 1730
17 1747
18 1575
19 884
20 560
21 1800
22 1244
