# Develop the NTE 

In [14]:
import pickle
import pandas as pd
import os

import nte
reload(nte)
from nte import *

# set up your pool
from multiprocessing import Pool
pool = Pool(processes=12)

## Setup the Paths and Pool

In [2]:
with open('./Data/NOxPaths.pickle', 'r') as handle:
    paths = pickle.load(handle)

In [3]:
mac = lambda x: x.replace("E:","/Volumes/Fleet Storage")
wnd = lambda x: x.replace("/","\\")

In [4]:
# set up your pool
pool = Pool(processes=12)

## Read in data from csvs

In [5]:
v_id = 12105
vehicle_paths = [mac(x) for x in paths[v_id]][:10]

In [6]:
%%time
# have your pool map the file names to dataframes
dfs = pool.map(read_csv, vehicle_paths)

# filter the dfs if longer than 500 rows (optional)
dfs = [df for df in dfs if len(df) > 500]

CPU times: user 42.3 ms, sys: 47.8 ms, total: 90 ms
Wall time: 4.3 s


In [7]:
# lets concat the entire vehicles data (optional)
df = pd.concat(dfs)
df = df.reset_index()
df = df.drop(['index'], axis=1)

In [8]:
from sys import getsizeof
# how many gb of memory are being used?
sum(map(getsizeof, dfs)) / 1e9

0.030536352

## Results and Intermediate Results

Once we run get_nte_proportions, we will have columns with True and False for each criteria, the gNOx for each moment, the work for each moment, and the window.

- ```criteria='torq_criteria'```: only meets torq criteria
- ```criteria='criterion'```: means all criteria must be met

### Results

In [16]:
# names of criteria for exploration
criteria_cols = ['engine_speed_criteria','torq_criteria',
                 'engine_air_temp_criteria','coolant_temp_criteria',
                 'scr_temp_criteria']

other_computed_cols = ['work', 'gNOx']

all_nox_cols = other_computed_cols + criteria_cols + ['criterion']

In [55]:
%%time
# get the proportion
prop_in_nte, prop_nte_testable = get_nte_proportion(df, 
                                 cutoff=.3, 
                                 criteria='criterion')

CPU times: user 822 ms, sys: 64.7 ms, total: 887 ms
Wall time: 861 ms


In [56]:
print "percent in nte:", prop_in_nte
print "percent in nte testable", prop_nte_testable

percent in nte: 0.00269008690405
percent in nte testable 0.159622078568


In [32]:
# since we ran the computation all_nox_cols exist
df[all_nox_cols].head()

Unnamed: 0,work,gNOx,engine_speed_criteria,torq_criteria,engine_air_temp_criteria,coolant_temp_criteria,scr_temp_criteria,criterion
0,0.0,0.315958,False,False,False,False,False,False
1,-0.0,0.284152,False,False,False,False,False,False
2,0.000874,0.289916,False,False,False,False,False,False
3,-0.000884,0.293036,False,False,False,False,False,False
4,-0.001771,0.292426,False,False,False,False,False,False


In [33]:
df[all_nox_cols].agg(sum)

work                          4959.294403
gNOx                         17560.266101
engine_speed_criteria       147902.000000
torq_criteria                43040.000000
engine_air_temp_criteria    136294.000000
coolant_temp_criteria       229756.000000
scr_temp_criteria           138949.000000
criterion                    14380.000000
dtype: float64

### Intermediate Results
If you want to get the ratio of emissions to positive work for each window and its corresponding length:

In [32]:
%%time
lt = get_percent_NTE_valid(df, 'torq_criteria')

CPU times: user 2.34 s, sys: 152 ms, total: 2.49 s
Wall time: 2.44 s


In [33]:
# index corresponds to the window that satisfies 'window' col in df
# tuple := (ratio, length of window)
lt.head()

window
62    (0.359840281147, 33)
68    (0.414192976466, 81)
70    (0.391215903535, 55)
82    (0.442820135961, 80)
92    (0.556141478303, 32)
dtype: object