In [42]:
import pandas as pd
from path import Path
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import warnings
import json
warnings.filterwarnings('ignore')

In [43]:
DATA_DIR = Path("/home/sharad/data/metasense/la/")

In [44]:
INITIAL_REMOVE = {
    4: 40,
    10: 150,
    8: 100,
    9: 110
}

In [45]:
CODE_TEMPLATE = """

{weights}

typedef struct gas_reading {{
    float CO;
    float O3;
    float NO2;
}} gas_reading;

void calibrate_values(float coA, float coW, float o3A, float o3W, float no2A, float no2W, float temperature, float humidity, float* CO_WEIGHTS, float CO_INTERCEPT, float* O3_WEIGHTS, float O3_INTERCEPT, float* NO2_WEIGHTS, float NO2_INTERCEPT, gas_reading* reading) {{
    int i = 0;
{body}
}}
"""
DOT_TEMPLATE = """
    for (i = 0; i < {length}; i++) {{
        {reading} += {vector}[i] * {weights}[i];
    }}
    {reading} += {intercept};
"""

In [46]:
def read_board(board_id):
    data = pd.read_csv(DATA_DIR / "csv"/ "B%u.csv" % board_id, index_col='datetime', parse_dates=True)
    data = data.iloc[INITIAL_REMOVE[board_id]:]
    data['temperature-F'] = 9.0 / 5.0 * data['temperature'] + 32
    if board_id != 9:
        train_data, test_data = data.loc[:"2016-08-18"], data.loc["2016-08-18":]
    else:
        train_data, test_data = data, None
    return train_data, test_data

CLASSIFIERS = {
    "CO": Pipeline([('poly', PolynomialFeatures(2)), ('cf', Lasso())]),
    "O3": Pipeline([('poly', PolynomialFeatures(1)), ('cf', LinearRegression())]),
    "NO2": Pipeline([('poly', PolynomialFeatures(2)), ('cf', Lasso())]),
}

FEATURES = {
    "CO": ["co-A", "co-W", "temperature", "humidity"],
    "O3": ["o3-A", "o3-W", "co-A", "co-W", "temperature", "humidity"],
    "NO2": ["o3-A", "o3-W", "no2-A", "no2-W", "co-A", "co-W", "temperature", "humidity"],
}

NAME_MAP = {
    "co-A": "coA",
    "co-W": "coW",
    "no2-A": "no2A",
    "no2-W": "no2W",
    "o3-A": "o3A",
    "o3-W": "o3W",
    "temperature": "temperature",
    "humidity": "humidity"
}

TARGET = {
    "CO": "epa-co",
    "O3": "epa-o3",
    "NO2": "epa-no2",
}

GASES = ["CO", "O3", "NO2"]

In [47]:
def generate_code(board_id):
    train_data, test_data = read_board(board_id)
    code = []
    weights = {}
    for gas in GASES:
        cf, features, target = CLASSIFIERS[gas], FEATURES[gas], TARGET[gas]
        cf.fit(train_data[features], train_data[target])
        code_vector, intercept, func = generate_vector(cf, features)
        weights[gas] = {
            'weights': func.tolist(),
            'intercept': intercept
        }
        code.append("    float %s_VECTOR[%u]  = %s;" % (gas, len(code_vector), "{%s}" % (", ".join(code_vector))))
        code.append(DOT_TEMPLATE.format(
            length=len(code_vector),
            reading="reading->%s" % gas,
            weights="%s_WEIGHTS" % gas,
            vector="%s_VECTOR" % gas,
            intercept="%s_INTERCEPT" % gas,
        ))
    return CODE_TEMPLATE.format(
        weights=json.dumps(weights),
        body='\n\n'.join(code)
    )
    
def generate_vector(cf, features):
    poly = cf.named_steps['poly']
    func = cf.named_steps['cf'].coef_
    intercept = cf.named_steps['cf'].intercept_
    terms = []
    for term in poly.powers_:
        polyterms = ["*".join([NAME_MAP[features[i]] for _ in range(power)]) for i, power in enumerate(term) if power > 0]
        if len(polyterms) == 0:
            polyterms = ['1']
        terms.append("*".join(polyterms))
    return terms, intercept, func

In [48]:
train_data, test_data = read_board(4)

In [49]:
cf, features, target = CLASSIFIERS['CO'], FEATURES['CO'], TARGET['CO']

In [50]:
cf.fit(train_data[features], train_data[target])

Pipeline(steps=[('poly', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('cf', Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False))])

In [51]:
cf.named_steps['cf'].intercept_

-0.028363721106486328

In [52]:
print(generate_code(4))



{"O3": {"intercept": -186.41827760344097, "weights": [0.0, 0.07230986418652653, 0.4160721301285209, 0.18579584421943768, -0.1643969201067098, 1.4324469991422648, -0.12546586464600012]}, "NO2": {"intercept": -15.116433013118854, "weights": [0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.000860361445119428, 1.4103861512856326e-05, -7.285684231575184e-05, -0.00010504628804771831, -0.0005577372965488009, 0.00030074302236256207, 0.0001358903534236543, -0.00033488851573922464, -0.00012026336884163385, -5.734607438455616e-05, -7.658389153104979e-05, -0.0017058139025766337, 3.39026286174402e-05, 0.0011525399770816068, -0.0002680356334287563, -0.0008941111476869357, -0.00012858214114180736, -4.42014149122897e-05, -1.855884378895224e-05, -0.0003002812405121397, -0.00039001091852163737, -9.277183606044659e-05, 0.0037242866272783454, -0.0003546177050616209, -0.0008689149792320277, 8.465202639161321e-05, 0.00014718066030555113, 0.0008011101570634587, -0.009397348543152724, -0.0010362001034199

In [53]:
print(generate_code(10))



{"O3": {"intercept": -123.45516642225701, "weights": [0.0, -0.49724879992587856, 0.5855773419868818, 0.4967864030055257, -0.01554562959355077, 0.5730359415103649, -0.7091768422218219]}, "NO2": {"intercept": -196.83031935674765, "weights": [0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0012424501186810803, -0.0002971363425903894, 0.0034782652921080378, 0.0003021199500810038, 0.0014294971569094011, 0.00036841497745968726, 0.003008867879112484, -0.0006975757532595027, -0.0009069856567289048, 0.0009162769629255814, 0.0030536391409059817, 4.035391864083345e-05, -0.0006529280931063168, -0.0022003135463553544, -0.00034660559871284795, -0.0022444715305006816, -0.003885467372960112, 0.0009303915587633484, 0.0027711252491321886, 0.00014795654390615745, -0.0009503204686086048, 0.0022191179894949626, -0.002633551522581687, -0.001911562273075625, -0.002435687914276378, 0.00011574916542995312, -0.0003157362588341309, 9.22829193065235e-07, -0.001804434864335245, 0.0009356368348090252, -2.3209

In [54]:
print(generate_code(8))



{"O3": {"intercept": 254.0218731350004, "weights": [0.0, -1.1563595176534505, 0.5768605369552319, 0.27977488564128233, -0.0777923014663584, 0.0, -0.6729144109954817]}, "NO2": {"intercept": 210.72601925961888, "weights": [0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.002538978262132088, -0.003121428967740682, -0.0004184132124288028, 0.00017222453882580245, 0.001885141335840755, 0.0004108135055299739, 0.0006849197673423578, -0.0025720597239259437, 0.0007242897640302858, -0.0028027322105233575, -0.0010758848990381736, -0.0003439817500832972, -0.0005213416903659827, 0.004236300211599229, -0.0010320268639009873, -0.0006349832111110764, 0.004806053855476336, 0.0014644998522670108, 0.0006281693676303309, -0.010726794340331981, 0.0010797091432756304, 0.0016447130829139614, -0.0009181264558453305, 2.7166017812942863e-05, 0.0006505160371262054, 9.309793421338685e-05, -0.0009765826473761864, -0.00014653678847105465, -0.0006468820384312948, -0.0009415788543265367, 1.110911519025407e-05, -2.15

In [55]:
print(generate_code(9))



{"O3": {"intercept": 70.5482564617913, "weights": [0.0, -0.4594531588585707, 0.36000714328918854, -0.11631553931546695, -0.11957281983039345, 2.303909580976486, 0.14656248158502222]}, "NO2": {"intercept": 42.50796591770853, "weights": [0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.3133779301058561, 8.475240216329029e-05, -0.0012231485020407814, 0.0017287611753339196, 0.004602631600136268, 0.0005698146113430367, -0.000794664956956683, -0.01267311021059106, -0.0002124811526529831, 0.0013247302779178757, -0.005373270904751683, -0.0016339644492835786, -0.00042201039456401325, 0.0002840120080081645, 0.0056501133227505745, 0.0001571587912401297, 0.00010375195464927867, 0.003129666554642594, -0.0012701304502413906, -0.0004283466217280731, -0.0092466755141929, -0.0004620785780105832, -0.0003588359399135106, -1.4771269789589606e-05, -0.00012586679445688028, -0.005736153886028624, -0.00027089280634710303, 0.0006360798409883186, 1.834326903869435e-05, 0.0009377243377931876, 0.0001914247337511761