# University of Rhode Island
# Final Project CSC 593, Programming for Scientists, Fall 2019
## Tim Jonas

### Load Packages

In [5]:
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta

### Remove or Replace unwanted characters
#### A function is defined below that removes respectively replaces unwanted characters from Columns. The initial files as downloaded from the fleetcarma website contained parentheses that are difficult to work with in the code. The function is called "Renamer" and can be used for every driving log.

In [6]:
# Remove unwanted characters from Columns
def renamer(df):
    drive = df.copy()
    drive.columns = drive.columns.str.replace("[", "_")
    drive.columns = drive.columns.str.replace("]","")
    drive.columns = drive.columns.str.replace("(", "_")
    drive.columns = drive.columns.str.replace(")","")
    drive.columns = drive.columns.str.replace("/","")
    drive.columns = drive.columns.str.replace("%","Pctg")
    
    return drive

### Create a list with all files in the the input folder
#### All driving log files are in the folder "InputDataFinalProject". The following code creates a list with all .csv files in the directory. This list is later used for a function that loops through every single file in the directory respectively on the list.

In [9]:
# Print all filenames contained in the folder if it is a .csv file. This is just for controlling reasons.
for filename in os.listdir('../homework/InputDataFinalProject'):
    if filename.endswith(".csv"): 
        print(filename)
# This function adds the name of every file in the directory that ends with ".csv" to the list "fileList". Initially the list is empty and will be filled with the filenames after.
def getFiles():
    fileList = []
    for filename in os.listdir('../homework/InputDataFinalProject'):
        if filename.endswith(".csv"):
            fileList.append(filename)
    
    return fileList


2018-11-29T20-34-18.csv
2018-11-30T15-03-05.csv
2018-11-14T12-52-06.csv
2018-11-27T15-53-11.csv
2018-11-05T21-05-26.csv
2018-11-19T13-01-20_run2.csv
2018-11-27T16-49-43_run2.csv
2018-11-14T19-54-58.csv
2018-11-19T13-01-20_run1.csv
2018-11-27T16-49-43_run1.csv
2018-11-08T15-16-59.csv
2018-11-15T16-47-06.csv
2018-12-04T12-46-28_run1.csv
2018-12-04T12-46-28_run2.csv
2018-11-15T12-47-44.csv
2018-11-08T13-19-45.csv
2018-11-15T13-37-51.csv
2018-11-28T12-26-14.csv
2018-11-14T20-49-28.csv
2018-11-07T19-48-33.csv
2018-11-28T13-17-30.csv
2018-11-07T13-22-03.csv
2018-11-21T15-57-44.csv
2018-11-26T12-34-09.csv
2018-11-29T12-35-21.csv
2018-11-30T15-54-22.csv
2018-11-14T13-37-37.csv
2018-11-21T13-08-24.csv
2018-11-26T19-23-11.csv
2018-11-29T19-33-55.csv


### Create file with summary data

In [39]:
columns = ['Workbookname','Date','Time','Type','Total Consumption (kWh)','Distance','Mean Consumption','Total Time (ms)','Average Speed','StdDev Speed','Average Acceleration','StdDev Acceleration','Average Jerk','StdDev Jerk','Initial SOC','Temperature']
TotalDrive = pd.DataFrame(columns = columns)

### Loop through all files in the input folder and do all calculations

In [40]:
fileList = getFiles()


for i in range(len(fileList)):
    drive = pd.read_csv('../homework/InputDataFinalProject'+'/'+fileList[i], skiprows= 5, header=0)
    drive = renamer(drive)
    drive = drive[drive.Longitude_deg != 0]
    # Calculations
    # Add column with speed as m_s
    drive['Vehicle Speed_m_s'] = drive['Vehicle Speed_kmh']/3.6
    # Add column with Acceleration
    drive['Vehicle Acceleration'] = drive['Vehicle Speed_m_s'] - drive['Vehicle Speed_m_s'].shift()
    # Add Column with Jerk
    drive['Vehicle Jerk'] = drive['Vehicle Acceleration'] - drive['Vehicle Acceleration'].shift()
    # Add column with distance travelled in that second
    drive['Dist_in_s'] = drive['Vehicle Speed_m_s']/1609.344
    # Add column with cumulative distance
    drive['Cumsum_Dist'] = drive['Dist_in_s'].cumsum()
    # Consumption for every second
    drive['kwh'] = ((drive['HV Battery Current_A'] * drive['HV Battery Voltage_V']*-1)/1000/3600)
    # Add column with distance to start point
    drive['Start_Cut'] = ((drive['Latitude_deg']-41.489207)**2)+((drive['Longitude_deg']+71.521673)**2)
    drive['Start_Cut']=np.sqrt((drive['Start_Cut']))
    # Add column with distance to end point
    drive['End_Cut'] = ((drive['Latitude_deg']-41.480337)**2)+((drive['Longitude_deg']+71.525495)**2)
    drive['End_Cut']=np.sqrt((drive['End_Cut']))
    drive.truncate(drive['Start_Cut'].idxmin(),drive['End_Cut'].idxmin())
    
    # Correcting and adding Date/Hour
    # Correct format for Date and Hour
    A = fileList[i] # At first is a str
    A = A.replace('T',' ')
    B = A[0:11]
    C = A[11:19]
    C = C.replace('-',':')
    D = B+C

    DateHour = datetime.strptime(D, "%Y-%m-%d %H:%M:%S") # Convert to datetime
    Date = DateHour.strftime('%Y-%m-%d') # Split only the Date and convert to str again
    CorrectHour = DateHour - timedelta(hours=5) # Minus 4 hours
    Hour = CorrectHour.strftime('%H:%M:%S') # Split only the Date and convert to str again
    
    hour = datetime.strptime(Hour,'%H:%M:%S')
    InicioTraffic = datetime.strptime("07:00:00", "%H:%M:%S")
    FinTraffic = datetime.strptime("09:00:00", "%H:%M:%S")
 
    if hour >= InicioTraffic and hour <= FinTraffic:
        Type = "Traffic"
    else:
        Type = "Non Traffic"

    
    TotalDrive = TotalDrive.append({'Workbookname' : fileList[i], 'Date' : Date, 'Time' : Hour, 'Type' : Type, 'Total Consumption (kWh)' : drive['kwh'].sum(), 'Distance' : drive.iloc[-1]['Cumsum_Dist'], 'Mean Consumption' : drive['kwh'].mean(), 'Average Speed' : drive['Vehicle Speed_kmh'].mean(), 'StdDev Speed' : drive['Vehicle Speed_kmh'].std(), 'Average Acceleration' : drive['Vehicle Acceleration'].mean(), 'StdDev Acceleration' : drive['Vehicle Acceleration'].std(), 'Average Jerk' : drive['Vehicle Jerk'].mean(), 'StdDev Jerk' : drive['Vehicle Jerk'].std(), 'Total Time (ms)' : drive.iloc[-1]['Timestamp_ms'] - drive.iloc[0]['Timestamp_ms'], 'Initial SOC' : drive.iloc[0]['HV Battery SOC_Pctg'], 'Temperature' : drive['OAT_degC'].mean()} , ignore_index=True)
    # drive.to_csv('../homework/OutputDataFinalProject'+'/'+fileList[i])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
