In [1]:
import pandas as pd
from path import Path
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = Path("/home/sharad/data/metasense/la/")

In [3]:
INITIAL_REMOVE = {
    4: 40,
    10: 150,
    8: 100,
    9: 110
}

In [4]:
CODE_TEMPLATE = """
typedef struct gas_reading {{
    float CO;
    float O3;
    float NO2;
}} gas_reading;

{weights}

void calibrate_values(float coA, float coW, float o3A, float o3W, float no2A, float no2W, float temperature, float humidity, float* CO_WEIGHTS, float* CO_INTERCEPT, float* O3_WEIGHTS, float* O3_INTERCEPT, float* NO2_WEIGHTS, float* NO2_INTERCEPT, gas_reading* reading) {{
    int i = 0;
{body}
}}
"""
DOT_TEMPLATE = """
    for (i = 0; i < {length}; i++) {{
        {reading} += {vector}[i] * {weights}[i]
    }}
    {reading} += {intercept}
"""

In [5]:
def read_board(board_id):
    data = pd.read_csv(DATA_DIR / "csv"/ "B%u.csv" % board_id, index_col='datetime', parse_dates=True)
    data = data.iloc[INITIAL_REMOVE[board_id]:]
    data['temperature-F'] = 9.0 / 5.0 * data['temperature'] + 32
    if board_id != 9:
        train_data, test_data = data.loc[:"2016-08-18"], data.loc["2016-08-18":]
    else:
        train_data, test_data = data, None
    return train_data, test_data

CLASSIFIERS = {
    "CO": Pipeline([('poly', PolynomialFeatures(2)), ('cf', Lasso())]),
    "O3": Pipeline([('poly', PolynomialFeatures(1)), ('cf', LinearRegression())]),
    "NO2": Pipeline([('poly', PolynomialFeatures(2)), ('cf', Lasso())]),
}

FEATURES = {
    "CO": ["co-A", "co-W", "temperature", "humidity"],
    "O3": ["o3-A", "o3-W", "co-A", "co-W", "temperature", "humidity"],
    "NO2": ["o3-A", "o3-W", "no2-A", "no2-W", "co-A", "co-W", "temperature", "humidity"],
}

NAME_MAP = {
    "co-A": "coA",
    "co-W": "coW",
    "no2-A": "no2A",
    "no2-W": "no2W",
    "o3-A": "coA",
    "o3-W": "o3W",
    "temperature": "temperature",
    "humidity": "humidity"
}

TARGET = {
    "CO": "epa-co",
    "O3": "epa-o3",
    "NO2": "epa-no2",
}

GASES = ["CO", "O3", "NO2"]

In [6]:
def generate_code(board_id):
    train_data, test_data = read_board(board_id)
    code = []
    weights = []
    for gas in GASES:
        cf, features, target = CLASSIFIERS[gas], FEATURES[gas], TARGET[gas]
        cf.fit(train_data[features], train_data[target])
        code_vector, intercept, func = generate_vector(cf, features)
        weights.append("float %s_WEIGHTS[%u]  = %s;" % (gas, len(func), "{%s}" % (", ".join(map(str, func)))))
        code.append("    float %s_VECTOR[%u]  = %s;" % (gas, len(code_vector), "{%s}" % (", ".join(code_vector))))
        code.append(DOT_TEMPLATE.format(
            length=len(code_vector),
            reading="reading->%s" % gas,
            weights="%s_WEIGHTS" % gas,
            vector="%s_VECTOR" % gas,
            intercept="%s_INTERCEPT" % gas,
        ))
    return CODE_TEMPLATE.format(
        weights='\n'.join(weights),
        body='\n\n'.join(code)
    )
    
def generate_vector(cf, features):
    poly = cf.named_steps['poly']
    func = cf.named_steps['cf'].coef_
    intercept = cf.named_steps['cf'].intercept_
    terms = []
    for term in poly.powers_:
        polyterms = ["*".join([NAME_MAP[features[i]] for _ in range(power)]) for i, power in enumerate(term) if power > 0]
        if len(polyterms) == 0:
            polyterms = ['1']
        terms.append("*".join(polyterms))
    return terms, intercept, func

In [7]:
print(generate_code(4))


typedef struct gas_reading {
    float CO;
    float O3;
    float NO2;
} gas_reading;

float CO_WEIGHTS[15]  = {0.0, -0.0, 0.0, 0.0, -0.0, -8.72860846677e-06, 1.43571798828e-05, -0.0, -3.96294520271e-05, -3.86720451822e-07, -2.06044449801e-05, 1.08301333851e-05, 0.0, -0.0, 4.69428221595e-05};
float O3_WEIGHTS[7]  = {0.0, 0.0723098641865, 0.416072130129, 0.185795844219, -0.164396920107, 1.43244699914, -0.125465864646};
float NO2_WEIGHTS[45]  = {0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.000860361445119, 1.41038615129e-05, -7.28568423158e-05, -0.000105046288048, -0.000557737296549, 0.000300743022363, 0.000135890353424, -0.000334888515739, -0.000120263368842, -5.73460743846e-05, -7.6583891531e-05, -0.00170581390258, 3.39026286174e-05, 0.00115253997708, -0.000268035633429, -0.000894111147687, -0.000128582141142, -4.42014149123e-05, -1.8558843789e-05, -0.000300281240512, -0.000390010918522, -9.27718360604e-05, 0.00372428662728, -0.000354617705062, -0.000868914979232, 8.46520263916

In [8]:
print(generate_code(10))


typedef struct gas_reading {
    float CO;
    float O3;
    float NO2;
} gas_reading;

float CO_WEIGHTS[15]  = {0.0, 0.0, 0.0, 0.0, -0.0, 3.57086721466e-06, 5.74430692509e-06, -0.0, -5.2752474955e-05, -9.80442824932e-07, 1.29521500218e-05, 2.61058100608e-05, 0.0, -0.0, 0.000135040094131};
float O3_WEIGHTS[7]  = {0.0, -0.497248799926, 0.585577341987, 0.496786403006, -0.0155456295936, 0.57303594151, -0.709176842222};
float NO2_WEIGHTS[45]  = {0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.00124245011868, -0.00029713634259, 0.00347826529211, 0.000302119950081, 0.00142949715691, 0.00036841497746, 0.00300886787911, -0.00069757575326, -0.000906985656729, 0.000916276962926, 0.00305363914091, 4.03539186408e-05, -0.000652928093106, -0.00220031354636, -0.000346605598713, -0.0022444715305, -0.00388546737296, 0.000930391558763, 0.00277112524913, 0.000147956543906, -0.000950320468609, 0.00221911798949, -0.00263355152258, -0.00191156227308, -0.00243568791428, 0.00011574916543, -0.000315736258

In [9]:
print(generate_code(8))


typedef struct gas_reading {
    float CO;
    float O3;
    float NO2;
} gas_reading;

float CO_WEIGHTS[15]  = {0.0, 0.0, 0.0, 0.0, -0.0, 3.14172494916e-06, -4.32462417645e-06, 0.0, -9.92116361028e-06, -7.02646860113e-07, 9.21857360543e-06, 5.78079270424e-05, 0.0, -6.34754738819e-05, 3.79428687918e-05};
float O3_WEIGHTS[7]  = {0.0, -1.15635951765, 0.576860536955, 0.279774885641, -0.0777923014664, 0.0, -0.672914410995};
float NO2_WEIGHTS[45]  = {0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00253897826213, -0.00312142896774, -0.000418413212429, 0.000172224538826, 0.00188514133584, 0.00041081350553, 0.000684919767342, -0.00257205972393, 0.00072428976403, -0.00280273221052, -0.00107588489904, -0.000343981750083, -0.000521341690366, 0.0042363002116, -0.0010320268639, -0.000634983211111, 0.00480605385548, 0.00146449985227, 0.00062816936763, -0.0107267943403, 0.00107970914328, 0.00164471308291, -0.000918126455845, 2.71660178129e-05, 0.000650516037126, 9.30979342134e-05, -0.0009765826473

In [10]:
print(generate_code(9))


typedef struct gas_reading {
    float CO;
    float O3;
    float NO2;
} gas_reading;

float CO_WEIGHTS[15]  = {0.0, 0.0, 0.0, -0.0, 0.0, 8.45736720659e-06, 1.25390477964e-06, -0.000150814461057, 6.80186705276e-06, 2.49059253318e-06, -1.5296020715e-05, -3.71265056155e-05, 0.0, -0.0, 0.0};
float O3_WEIGHTS[7]  = {0.0, -0.459453158859, 0.360007143289, -0.116315539315, -0.11957281983, 2.30390958098, 0.146562481585};
float NO2_WEIGHTS[45]  = {0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.313377930106, 8.47524021633e-05, -0.00122314850204, 0.00172876117533, 0.00460263160014, 0.000569814611343, -0.000794664956957, -0.0126731102106, -0.000212481152653, 0.00132473027792, -0.00537327090475, -0.00163396444928, -0.000422010394564, 0.000284012008008, 0.00565011332275, 0.00015715879124, 0.000103751954649, 0.00312966655464, -0.00127013045024, -0.000428346621728, -0.00924667551419, -0.000462078578011, -0.000358835939914, -1.47712697896e-05, -0.000125866794457, -0.00573615388603, -0.000270892806347, 