In [1]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
from raw_data_processing import get_table, remove_nan_from_table

In [2]:
BEHAVIOR_DATA_DIR = '../data/behavior'
BEHAVIOR_DATA_FILES = [f for f in listdir(BEHAVIOR_DATA_DIR) if isfile(join(BEHAVIOR_DATA_DIR, f))]
BAD_DATA = ['localwithgps.json', 'driving.json']
BEHAVIOR_DATA_FILES = [f for f in BEHAVIOR_DATA_FILES if f not in BAD_DATA]

In [3]:
def get_accumulation_without_jumps(values, direction):
    increments = np.diff(values)
#     flips = np.where(increments < 0)[0]
#     for flip in flips:
#         print('{} to {}'.format(values[flip], values[flip+1]))
    increments = increments[increments * direction >= 0]
    return sum(increments)

In [4]:
def get_miles_per_gallon(df, columns):
    odometer = df[columns[0]]
    fuel = df[columns[1]]
    kilometers = get_accumulation_without_jumps(odometer, 1)
    liters = get_accumulation_without_jumps(fuel, 1)
    miles = kilometers * 0.621371
    gallons = liters * 0.264172
    return miles/gallons

In [5]:
def capitalize(name):
    return name[0].upper() + name[1:].lower()
    
def get_uppercase_name(json_name):
    stripped = json_name.split('.')[0]
    dashed = stripped.split('-')
    return ' '.join([capitalize(part) for part in dashed])

In [6]:
def get_desired_columns(df, column_groups):
    desired_columns = []
    for group in column_groups:
        desired_columns.append(next((column for column in group if column in df.columns), None))
    return desired_columns
            

In [13]:
data = []
for file_name in BEHAVIOR_DATA_FILES:
    df = get_table(join(BEHAVIOR_DATA_DIR, file_name))
    relevant_columns = get_desired_columns(df, [['odometer', 'fine_odometer_since_restart'], ['fuel_consumed_since_restart']])
    df = remove_nan_from_table(df, relevant_columns)
    df = df[relevant_columns]
    mpg = get_miles_per_gallon(df, relevant_columns)
    display_name = get_uppercase_name(file_name)
    data.append({'Behavior': display_name, 'MPG': mpg})

In [14]:
df = pd.DataFrame(data)

In [15]:
df.to_csv('mpg_data.csv')