In [47]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
from raw_data_processing import get_table, remove_nan_from_table

In [3]:
BEHAVIOR_DATA_DIR = '../data/behavior'
BEHAVIOR_DATA_FILES = [f for f in listdir(BEHAVIOR_DATA_DIR) if isfile(join(BEHAVIOR_DATA_DIR, f))]

In [144]:
def get_accumulation_without_jumps(values, direction):
    increments = np.diff(values)
#     flips = np.where(increments < 0)[0]
#     for flip in flips:
#         print('{} to {}'.format(values[flip], values[flip+1]))
    increments = increments[increments * direction >= 0]
    return sum(increments)

In [145]:
def get_miles_per_gallon(df, columns):
    odometer = df[columns[0]]
    fuel = df[columns[1]]
    kilometers = get_accumulation_without_jumps(odometer, 1)
    liters = get_accumulation_without_jumps(fuel, 1)
    miles = kilometers * 0.621371
    gallons = liters * 0.264172
    return miles/gallons

In [5]:
def capitalize(name):
    return name[0].upper() + name[1:].lower()
    
def get_uppercase_name(json_name):
    stripped = json_name.split('.')[0]
    dashed = stripped.split('-')
    return ' '.join([capitalize(part) for part in dashed])

In [22]:
def get_desired_columns(df, column_groups):
    desired_columns = []
    for group in column_groups:
        desired_columns.append(next((column for column in group if column in df.columns), None))
    return desired_columns
            

In [146]:
data = {}

for file_name in BEHAVIOR_DATA_FILES[4:5]:
    df = get_table(join(BEHAVIOR_DATA_DIR, file_name))
    relevant_columns = get_desired_columns(df, [['odometer', 'fine_odometer_since_restart'], ['fuel_consumed_since_restart']])
    df = remove_nan_from_table(df, relevant_columns)
    df = df[relevant_columns]
    mpg = get_miles_per_gallon(df, relevant_columns)
    display_name = get_uppercase_name(file_name)
    data[display_name] = mpg

43590.984375 to 0.0
3.365573 to 3.365494
3.385604 to 3.385524
3.425748 to 3.425669
3.525908 to 3.525748
3.545938 to 3.545779
3.565968 to 3.565809
3.606027 to 3.605868
3.646091 to 3.646012
3.686151 to 3.686072
3.726216 to 3.726137
3.766198 to 3.766119
3.786231 to 3.786152
3.806345 to 3.806186
3.826375 to 3.826296
3.866438 to 3.866359
3.886391 to 3.886312
3.906424 to 3.906345
3.926458 to 3.926378
3.966604 to 3.966524
3.986557 to 3.986478
4.066694 to 4.066615
4.106767 to 4.106688
4.146834 to 4.146755
4.166863 to 4.166784
4.227046 to 4.226966
4.307178 to 4.307098
4.327123 to 4.327043
4.347235 to 4.347155
4.367259 to 4.367179
4.407322 to 4.407243
4.44738 to 4.447221
4.467414 to 4.467255
4.567573 to 4.567493
4.647716 to 4.647557
4.707816 to 4.707736
4.72786 to 4.727702
4.747898 to 4.747739
4.767964 to 4.767806
4.769159 to 0.003259
0.020191 to 0.020112
0.080289 to 0.080209
0.100321 to 0.100162
0.120353 to 0.120194
0.200483 to 0.200324
0.220515 to 0.220356
0.280612 to 0.280453
0.300644 to 0.30

In [143]:
get_miles_per_gallon(df, relevant_columns)

51678.79617089712

In [59]:
file_name

'localwithgps.json'