In [14]:
import pandas as pd
import numpy as np
import sqlalchemy
import oursql
import matplotlib.pyplot as plt
import seaborn as sns


In [15]:
def load_from_mysql():
    conn = sqlalchemy.create_engine("mysql+oursql://steve:zissou@localhost/torque")
    df = pd.read_sql('raw_logs',conn)
    return df

def load_torque_keys(keyf='/home/aahu/chinar/ryancompton.net/assets/torque/torque_keys.csv'):
    d = {}
    with open(keyf,'r') as fin:
        for line in fin:
            d[line.split(',')[0]] = line.split(',')[1].replace('\n','')
    return d
        
def load_from_file():
    df = pd.read_csv('/home/aahu/Desktop/torque_data.tsv',
                      sep='\t')
    df = df.rename(columns=load_torque_keys())

    return df

In [17]:
df = load_from_file()

In [32]:
#24–26 city / 28–32 hwy

#mpg = df[df['Speed (OBD)'] < 90]['Miles Per Gallon(Instant)']
dfmpg = df[df['Miles Per Gallon(Instant)'] > 0]
dfmpg = dfmpg[dfmpg['Miles Per Gallon(Instant)'] < 200]

mpg = dfmpg['Miles Per Gallon(Instant)']
mpg.hist(bins=100, label='observed mpg')

#plt.vlines(mpg.median(),ymin=0,ymax=plt.ylim()[1])
#plt.vlines(mpg.mean(),ymin=0,ymax=plt.ylim()[1])

plt.axvspan(24, 26, alpha=0.5, color='red', label='24–26 (advertised city mpg)')
plt.axvspan(28, 32, alpha=0.5, color="orange", label='28-32 (advertised hwy mpg)')

plt.xlabel('Miles per gallon')
plt.ylabel('Frequency')
plt.title('Histogram of instantaneous mpg readouts\n\
2003 Suzuki Aerio SX 5sp manual\n\
573 miles traveled. Data collected in Los Angeles using Open Torque Viewer.')
plt.legend()

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/mpg_hist.png')
plt.close()

In [13]:
dfgps = df[df['Speed (GPS)'] > 0]
spd_discrep = abs(dfgps['Speed (GPS)'] - dfgps['Speed (OBD)'])
sns.distplot(spd_discrep,bins=200)

plt.xlim([0,10])
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
mu = spd_discrep.mean()
median = spd_discrep.median()
sigma = spd_discrep.std()
textstr = '$\mu=%.2f$\n$\mathrm{median}=%.2f$\n$\sigma=%.2f$'%(mu, median, sigma)
plt.text(8.12,.29,textstr, bbox=props)

plt.xlabel('Discrepancy (mph)')
plt.ylabel('Normalized frequency')
plt.title('Discrepancies between GPS-measured speed and my speedometer')
plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/speed_hist.png')
plt.close()

In [20]:
lat0 = 34.209165
lng0 = -118.99
lat1 = 34.195597
lng1 = -118.950455

df_conejo = df[(lng0 < df['GPS Longitude']) &( df['GPS Longitude'] < lng1) & (lat1 < df['GPS Latitude']) & (df['GPS Latitude'] < lat0)]
df_conejo = df_conejo[df_conejo['Miles Per Gallon(Instant)'] > 0]

g = sns.JointGrid('Miles Per Gallon(Instant)','Throttle Position(Manifold)', df_conejo, space=0)
g.plot_marginals(sns.distplot, bins=20)#, shade=True)
g.plot_joint(sns.kdeplot, shade=True, n_levels=20, alpha=.8)
g.plot_joint(plt.scatter, alpha=.5)

plt.xlim([0,190])
plt.ylim([0,70])

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
textstr = 'Downhill'
plt.text(130,20, textstr, bbox=props)
textstr = 'Uphill'
plt.text(40,35, textstr, bbox=props)

g.fig.suptitle('Throttle posistion vs. mpg while driving the Conejo Grade in Ventura', y=.995)

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/conejo_joint.png')
plt.close()



In [22]:
#mpg = df[df['Speed (OBD)'] < 90]['Miles Per Gallon(Instant)']
dfmpg = df_conejo[df_conejo['Miles Per Gallon(Instant)'] > 0]
dfmpg = dfmpg[dfmpg['Miles Per Gallon(Instant)'] < 200]

mpg = dfmpg['Miles Per Gallon(Instant)']
mpg.hist(bins=100, label='observed mpg')

#plt.vlines(mpg.median(),ymin=0,ymax=plt.ylim()[1])
#plt.vlines(mpg.mean(),ymin=0,ymax=plt.ylim()[1])

plt.axvspan(24, 26, alpha=0.5, color='red', label='24–26 (advertised city mpg)')
plt.axvspan(28, 32, alpha=0.5, color="orange", label='28-32 (advertised hwy mpg)')

plt.xlabel('Miles per gallon')
plt.ylabel('Frequency')
plt.title('Histogram of instantaneous mpg readouts while driving the Conejo Grade')
plt.legend()

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/mpg_hist_conejo.png')
plt.close()

In [77]:
from scipy import integrate
import pandas as pd
import numpy as np

def integrate_method(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError
    
    result = rule(self.values, self.index.astype(np.int64) / 10**9)
    #result = rule(self.values)
    return result

pd.TimeSeries.integrate = integrate_method

In [78]:
ts = df[['Timestamp','Speed (OBD)']]

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex

In [11]:
#integrate to get total distance...
import datetime
df2 = df[['Timestamp', 'Speed (OBD)']]
df2['Timestamp'] = df2['Timestamp'].map(lambda x: datetime.datetime.fromtimestamp(x//1000))


ts = df2.set_index('Timestamp').resample('s')['Speed (OBD)'].fillna(0)
ts = ts/(60*60)

In [130]:
ts.integrate('simps') # total miles traveled

923.18648148148168

In [12]:
ts.resample('1min').plot()
plt.show()

  GLib.source_remove(self._idle_event_id)


In [86]:
[c for c in df.columns]

['Unnamed: 0',
 'v',
 'Session ID',
 'Device ID',
 'Timestamp',
 'GPS Longitude',
 'GPS Latitude',
 'Speed (GPS)',
 'kff1007',
 'Engine Load',
 'Fuel Level (From Engine ECU)',
 'Throttle Position(Manifold)',
 'Engine Coolant Temperature',
 'Engine RPM',
 'Speed (OBD)',
 'Intake Air Temperature',
 'Horsepower (At the wheels)',
 'Acceleration Sensor(X axis)',
 'Acceleration Sensor(Y axis)',
 'Ambient air temp',
 'eml',
 'Cost per mile/km (Instant)',
 'Turbo Boost & Vacuum Gauge',
 'Average trip speed(whilst stopped or moving)',
 'GPS Accuracy',
 'Miles Per Gallon(Instant)',
 'Miles Per Gallon(Long Term Average)',
 'GPS Altitude',
 'GPS Satellites',
 'Timing Advance',
 'Acceleration Sensor(Total)',
 'Average trip speed(whilst moving only)',
 'Fuel cost (trip)',
 'Cost per mile/km (Trip)',
 'Torque',
 'Engine kW (At the wheels)',
 '0-30mph Time',
 '0-60mph Time']

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2
