# Drive characteristics using `strym`

In this notebook, we will generate characteristics of a particular drive. Please ensure strym is installed correctly

## Importing packages
Import required packages

In [1]:
from strym import strymread
import strym
import math
import time
import matplotlib.pyplot as plt
import numpy as np
import scipy.integrate as integrate
import sys
import os

  import pandas.util.testing as tm


## Specify Data Location

In [2]:
import glob
parentfolder = "../../PandaData/"
folderlist = glob.glob(parentfolder+"*")
csvlist = []
for datafolder in folderlist:
    csvlisttmp = glob.glob(datafolder+"/*CAN*.csv")
    for f in csvlisttmp:
        csvlist.append(f)

In [3]:
num_of_files = len(csvlist)
print("Total number of datafiles in {} is {}.".format(parentfolder, num_of_files))

Total number of datafiles in ../../PandaData/ is 615.


## Analysis
### 1. CSV file containing all messages
In this section, we will extract a subset of the CSV-formatted CAN Data in order to produce summary information of a specific drive.

In [4]:
dbcfile = '../examples/newToyotacode.dbc'
metadata = []
for csvfile in csvlist:
    try:
        
        # Skip the Honda Pilot drives
        if "5FNYF6H05HB089022" in csvfile:
            continue
            
        print(f'Processing {csvfile}')
        r0 = strymread(csvfile=csvfile, dbcfile=dbcfile)
        
        # Success flag tells if strymread successfully read the CSV file. If CSV file is empty, 
        # has unacceptable format or some error occur, then success attribute is set to False
        if not r0.success:
            continue
        duration1 = r0.dataframe['Time'][len(r0.dataframe)-1] - r0.dataframe['Time'][0]
        duration_str = f'  Duration of this drive is {duration1} seconds ({math.trunc(int(duration1*1000)/(1000*60))} minutes {math.trunc(duration1 % 60)} seconds).'
        start_str = f'  Starting date/time of the drive is {time.ctime(r0.dataframe["Time"][0])}'

        # get the speed timeseries information from the data file
        speed_ts = r0.speed()
        # turn the timeseries into a python array for integration
        # transform from km/hr by multiplying 1000m/1km and 1 hr/3600s to get m/s
        speed_ar = np.array(speed_ts['Message'])*1000/3600
    #     speed_ar[0:-1]*1000/3600
        # find the difference of the time values
    #     dt = np.diff(np.array(speed_ts['Time']))
        dt = np.diff(speed_ts['Time'])
        # trapezoidal integration, divide by 1000 to get total km (rather than m)
        # Commented out: this produces negative values that don't make sense
    #     km_ts = np.trapz(y=speed_ar[0:-1],x=dt)/1000
        km_ts = np.trapz(y=speed_ar,x=np.array(speed_ts['Time']))/1000
        # commented out: this produces incorrect values that seem to be off by a factor of around 2, depending
        km_dx = np.trapz(y=speed_ar,dx=0.02)/1000
        # need to convert km/hr to km/s to get km later when integrating
        distance_str = f'  The trip was {km_ts} km ({km_ts*3.1/5} miles)'
    #     distance_str_dx f'  The dx version is {km_dx} km ({km_dx*3.1/5} miles)'

        #speed_ts['Time']
        #f'Total miles driven is {km_ts}'
    #     print(duration_str)
    #     print(start_str)
    #     print(distance_str)
    #     print('')
        # reproduce something like Fri Mar 13 06:53:09 2020 GMT-07
        time_str = (time.strftime('%a %b %d %H:%M:%S %Y %Z',time.gmtime(r0.dataframe['Time'][0])))
        # put it all together now
        drive = { 'filepath': csvfile, 'filename': os.path.basename(csvfile), 'distance_km': km_ts, 'distance_miles': km_ts*3.1/5, 'duration_s': duration1, 
                 'date': time_str }
        metadata.append( drive )
    except:
        print(f'Unable to process ', csvfile, ', exception=', sys.exc_info() )
print(metadata)

Processing ../../PandaData/2020_02_13/2020-02-13-08-29-09-380270__CAN_Message.csv
Processing ../../PandaData/2020_02_13/2020-02-13-15-37-19-262923__CAN_Message.csv
Processing ../../PandaData/2020_05_29/2020-05-29-11-07-33_2T3Y1RFV8KC014025_CAN_Messages.csv
No data was present in the csvfile or pandas dataframe supplied is empty. Unable to perform further operation
Processing ../../PandaData/2020_05_29/2020-05-29-18-25-00_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_05_29/2020-05-29-20-46-30_2T3Y1RFV8KC014025_CAN_Messages.csv
No data was present in the csvfile or pandas dataframe supplied is empty. Unable to perform further operation
Processing ../../PandaData/2020_05_29/2020-05-29-16-38-45_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_05_29/2020-05-29-10-52-06_2T3Y1RFV8KC014025_CAN_Messages.csv
No data was present in the csvfile or pandas dataframe supplied is empty. Unable to perform further operation
Processing ../../PandaData/2020_05_29/202

Processing ../../PandaData/2020_09_03/2020-09-03-13-38-09_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-13-02-12_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-13-27-40_2T3Y1RFV8KC014025_CAN_Messages.csv
CSVfile is empty.
Processing ../../PandaData/2020_09_03/2020-09-03-12-56-41_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-13-28-35_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-11-45-48_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-12-37-55_2T3Y1RFV8KC014025_CAN_Messages.csv
Processing ../../PandaData/2020_09_03/2020-09-03-13-18-58_2T3Y1RFV8KC014025_CAN_Messages.csv
CSVfile is empty.
Processing ../../PandaData/2020_09_03/2020-09-03-13-17-57_2T3Y1RFV8KC014025_CAN_Messages.csv
CSVfile is empty.
Processing ../../PandaData/2020_09_03/2020-09-03-13-19-54_2T3Y1RFV8KC014025_CAN_Messages.csv
CSVfile is empty

Processing ../../PandaData/2020_02_03/2020-02-03-14-35-36-256408__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-40-32-243617__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-25-28-181662__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-39-15-467899__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-34-57-536765__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-29-51-498988__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-57-01-431634__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-58-27-053566__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-31-11-510096__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-37-52-099866__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-33-39-543079__CAN_Message_.csv
Processing ../../PandaData/2020_02_03/2020-02-03-14-40-56-288660__CAN_Message_.csv
Proc

[{'filepath': '../../PandaData/2020_02_13/2020-02-13-08-29-09-380270__CAN_Message.csv', 'filename': '2020-02-13-08-29-09-380270__CAN_Message.csv', 'distance_km': 44.53907478853597, 'distance_miles': 27.614226368892304, 'duration_s': 3586.3246154785156, 'date': 'Thu Feb 13 15:29:09 2020 GMT'}, {'filepath': '../../PandaData/2020_02_13/2020-02-13-15-37-19-262923__CAN_Message.csv', 'filename': '2020-02-13-15-37-19-262923__CAN_Message.csv', 'distance_km': 2.076620222184724, 'distance_miles': 1.2875045377545289, 'duration_s': 603.9240891933441, 'date': 'Thu Feb 13 22:37:19 2020 GMT'}, {'filepath': '../../PandaData/2020_05_29/2020-05-29-18-25-00_2T3Y1RFV8KC014025_CAN_Messages.csv', 'filename': '2020-05-29-18-25-00_2T3Y1RFV8KC014025_CAN_Messages.csv', 'distance_km': 0.6349699346578783, 'distance_miles': 0.3936813594878846, 'duration_s': 153.43460321426392, 'date': 'Sat May 30 01:25:00 2020 GMT'}, {'filepath': '../../PandaData/2020_05_29/2020-05-29-16-38-45_2T3Y1RFV8KC014025_CAN_Messages.csv', 

## Now lets look at the total miles drive 

In [5]:
# how did we do?
dist=0
for d in metadata:
    dist = dist + d['distance_miles']
print(dist)

1522.2737081602304


In [9]:
metadata

[{'filepath': '/Users/sprinkle/work/data/cyverse/rahulbhadani/JmscslgroupData/PandaData/2019_10_25/2019-10-25-13-51-36-833648_a4f27f0c-36b0-4abe-beed-3d548605c439_CAN_Message_Rav4.csv',
  'filename': '2019-10-25-13-51-36-833648_a4f27f0c-36b0-4abe-beed-3d548605c439_CAN_Message_Rav4.csv',
  'distance_km': 0.0,
  'distance_miles': 0.0,
  'duration_s': 1.810054063796997,
  'date': 'Fri Oct 25 20:51:36 2019 UTC'},
 {'filepath': '/Users/sprinkle/work/data/cyverse/rahulbhadani/JmscslgroupData/PandaData/2019_10_25/2019-10-25-13-57-22-031456_b118f877-5385-4205-894a-a37515307ee1_CAN_Message_Rav4.csv',
  'filename': '2019-10-25-13-57-22-031456_b118f877-5385-4205-894a-a37515307ee1_CAN_Message_Rav4.csv',
  'distance_km': 0.0,
  'distance_miles': 0.0,
  'duration_s': 0.9986419677734375,
  'date': 'Fri Oct 25 20:57:22 2019 UTC'},
 {'filepath': '/Users/sprinkle/work/data/cyverse/rahulbhadani/JmscslgroupData/PandaData/2019_10_25/2019-10-25-13-49-54-310111_cb40aa45-de04-4313-bb9b-1bcf10ebaf70_CAN_Messag

In [6]:
import json
import os
import datetime
# eventually do everything, but for now just one file
for drive_i in metadata:
    # drive_i = metadata[215]
    drive_i["metadata_date"] = datetime.datetime.now().astimezone().strftime('%a %b %d %H:%M:%S %Y %Z')
    # find the name attached to this filename
    file_path = os.path.splitext(drive_i["filepath"])[0]
    index = file_path.find("CAN")
    json_filename = f'{file_path[0:index]}_metadata.json'

    #json_filename = f'{}.json'
    print(json_filename)
    ## json_basename = os.path.basename(drive_i['filename'])
    with open(json_filename,'w') as outfile:
        json.dump(drive_i,outfile,indent=4)

../../PandaData/2020_02_13/2020-02-13-08-29-09-380270___metadata.json
../../PandaData/2020_02_13/2020-02-13-15-37-19-262923___metadata.json
../../PandaData/2020_05_29/2020-05-29-18-25-00_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_05_29/2020-05-29-16-38-45_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_05_29/2020-05-29-14-22-43_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_03_15/2020-03-15-17-32-00__metadata.json
../../PandaData/2020_03_15/2020-03-15-15-12-00__metadata.json
../../PandaData/2020_05_30/2020-05-30-17-24-58_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_05_30/2020-05-30-18-06-35_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2019_11_08/2019-11-08-09-58-45-252022___metadata.json
../../PandaData/2019_11_08/2019-11-08-10-01-55-076615___metadata.json
../../PandaData/2019_11_08/2019-11-08-10-00-18-782963___metadata.json
../../PandaData/2019_11_08/2019-11-08-10-04-43-800750___metadata.json
../../PandaData/2019_11_08/2019-11-08-10-10-00-818245___

../../PandaData/2020_08_15/2020-08-15-19-39-23_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_08_15/2020-08-15-08-38-15_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_08_15/2020-08-15-19-10-19_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_09_01/2020-09-01-15-36-31_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_09_01/2020-09-01-16-03-28_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_09_01/2020-09-01-13-15-33_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_02_17/2020-02-17-08-48-37-911165___metadata.json
../../PandaData/2020_02_17/2020-02-17-12-38-21-095572___metadata.json
../../PandaData/2020_02_17/2020-02-17-12-37-36-342603___metadata.json
../../PandaData/2020_02_17/2020-02-17-11-50-57-809412___metadata.json
../../PandaData/2020_04_15/2020-04-15-05-12-00_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_04_15/2020-04-15-09-08-30_2T3Y1RFV8KC014025__metadata.json
../../PandaData/2020_03_06/2020-03-06-11-07-04__metadata.json
../../PandaData/20