In [1]:
import pickle
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from labels import label_datetimes
from scipy.signal import butter, filtfilt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
CUTOFF = 5
FS = 50
ORDER = 1


def apply_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter(order, cutoff, fs=fs, btype="low", analog=False)
    y = filtfilt(b, a, data)
    return y

In [3]:
DAY = "day_11"
with open(f"data/{DAY}/interp.pkl", "rb") as f:
    sensor_data = pickle.load(f)

print(sensor_data.keys())
print(sensor_data["air"][0].keys())
print(type(sensor_data["air"][0]["t"]))
print(np.array_equal(sensor_data["air"][0]["t"],
                     sensor_data["air"][7]["t"]))
print(np.array_equal(sensor_data["sample"][0]["t"],
                     sensor_data["air"][7]["t"]))

dict_keys(['air', 'sample'])
dict_keys(['t', 'hps'])
<class 'numpy.ndarray'>
True
True


In [4]:
def get_pair_net(sensor_data, sensor_l, sensor_r, as_log=False):
    """ 
    sensor_l: 2, sensor_r: 3
    """
    air_l = np.array(sensor_data["air"][sensor_l]["hps"])
    air_r = np.array(sensor_data["air"][sensor_r]["hps"])
    samp_l = np.array(sensor_data["sample"][sensor_l]["hps"])
    samp_r = np.array(sensor_data["sample"][sensor_r]["hps"])

    air_l = apply_lowpass_filter(air_l, CUTOFF, FS, ORDER)
    air_r = apply_lowpass_filter(air_r, CUTOFF, FS, ORDER)
    samp_l = apply_lowpass_filter(samp_l, CUTOFF, FS, ORDER)
    samp_r = apply_lowpass_filter(samp_r, CUTOFF, FS, ORDER)

    if as_log:
        air_l = np.log(air_l)
        air_r = np.log(air_r)
        samp_l = np.log(samp_l)
        samp_r = np.log(samp_r)

    diff_l = samp_l - air_l
    diff_r = samp_r - air_r
    net = np.concatenate((diff_l, diff_r), axis=0)
    t = sensor_data["air"][sensor_l]["t"]
    return t, net

In [5]:
def get_daily_net_pair(days, sensor_l, sensor_r, as_log=True):
    ts = None
    nets = None
    for i, day in enumerate(days):
        DAY = f"day_{day}"
        with open(f"data/{DAY}/interp.pkl", "rb") as f:
            sensor_data = pickle.load(f)

        t, net = get_pair_net(sensor_data, sensor_l, sensor_r, as_log=as_log)
        t = t = np.linspace(1, 100, 100) + (i * 400)
        if ts is None:
            ts = t.copy()
            nets = net.copy()
        else:
            ts = np.concatenate((ts, t))
            nets = np.concatenate((nets, net), axis=1)

    return ts, nets

In [6]:
def plot_days_net(t, ys, sl, sr, save=False):
    title = f"Net Sensor {sl}, {sr} Readings"
    fig = go.Figure()

    for y in ys:
        fig.add_trace(go.Scatter(x=t,
                                 y=y,
                                 mode="markers",
                                 showlegend=False))

    fig.update_yaxes(title_text="Resistance (Ohms)")
    fig.update_xaxes(title_text="Timestamp")
    fig.update_layout(height=500, width=800,
                      title_x=0.5,
                      font_family="Times New Roman",
                      title_font_family="Times New Roman",
                      title=dict(text=title, pad=dict(t=0, r=0, b=0, l=0)),
                      margin=dict(t=50, r=10, b=0, l=0))
    fig.show()
    if save:
        fig.write_image(f"Net_{sl}_{sr}.pdf")

In [7]:
for s in range(4):
    sl, sr = s * 2, s * 2 + 1
    ts, nets = get_daily_net_pair([1, 2, 3, 5, 7, 9, 11], sl, sr)
    plot_days_net(ts, nets, sl, sr, save=False)

In [7]:
def get_daily_net_pair_box(days, sensor_l, sensor_r):
    result = {"t": [], "y": []}
    for day in days:
        DAY = f"day_{day}"
        with open(f"data/{DAY}/interp.pkl", "rb") as f:
            sensor_data = pickle.load(f)

        t, net = get_pair_net(sensor_data, sensor_l, sensor_r, as_log=True)
        net = net.flatten()
        t = [f"Day {day}"] * len(net)
        result["t"].extend(t)
        result["y"].extend(list(net))
    return result

In [11]:
def get_lab_scores(days, label_datetimes, as_log=False, scale=False):
    targets = []
    day_names = []
    for day in days:
        day_names.append(f"Day {day}")
        label = next((i for i in label_datetimes if i["day"] == day), None)
        target = label["target"]
        targets.append(target)
    targets = np.array(targets)
    if as_log:
        targets = np.log(targets)
    if scale:
        scaler = StandardScaler()
        targets = scaler.fit_transform(targets.reshape(-1, 1))
        print(scaler.mean_, np.sqrt(scaler.var_))
        targets = targets.squeeze()
    return day_names, targets

In [12]:
get_lab_scores([1, 2, 3, 5, 7, 9, 11], label_datetimes, as_log=False, scale=True)

[86994792.85714285] [2.07410523e+08]


(['Day 1', 'Day 2', 'Day 3', 'Day 5', 'Day 7', 'Day 9', 'Day 11'],
 array([-0.41943134, -0.41942695, -0.41942324, -0.41456331, -0.39098688,
        -0.38544232,  2.44927403]))

In [18]:
def plot_boxes(days, sl, sr, save=False):
    r = get_daily_net_pair_box(days, sl, sr)
    title = f"Sensors {sl}, {sr} and TAMB Data"
    fig = go.Figure(
        data=go.Box(
            x=r["t"],
            y=r["y"],
            name="Sensors",
            marker=dict(color="slateblue"),
        )
    )
    day_names, counts = get_lab_scores(days, label_datetimes, as_log=False, scale=True)
    fig.add_trace(
        go.Scatter(
            x=day_names,
            y=-counts,
            yaxis="y2",
            name="TAMB",
            marker=dict(color="crimson"),
        )
    )
    fig.update_xaxes(title_text="Days")
    fig.update_layout(height=400, width=600,
                      title_x=0.5,
                      font_family="Times New Roman",
                      title_font_family="Times New Roman",
                      title=dict(text=title, pad=dict(t=0, r=0, b=0, l=0)),
                      margin=dict(t=50, r=10, b=0, l=0),
                      yaxis=dict(
                          title=dict(text="Resistance (Ohms)"),
                          side="left"
                      ),
                      yaxis2=dict(
                          title=dict(text="TAMB"),
                          side="right",
                          overlaying="y",
                          tickmode="sync",
                      ))
    fig.show()
    if save:
        fig.write_image(f"Box_{sl}_{sr}.pdf")

In [19]:
for s in range(4):
    sl, sr = s * 2, s * 2 + 1
    if sl == 2:
        plot_boxes([1, 2, 3, 5, 7, 9, 11], sl, sr, save=True)

In [12]:
def get_regression_data(days, labels, sl, sr, as_log=False):
    _, nets = get_daily_net_pair(days, sl, sr, as_log=as_log)
    nets = np.transpose(nets)
    _, lab_counts = get_lab_scores(days, labels, scale=True)
    targets = []
    for i, _ in enumerate(days):
        targets.extend([lab_counts[i]] * 100)
    
    targets = np.array(targets)
    return nets, targets

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, ElasticNet, SGDRegressor, BayesianRidge

In [14]:
rs = 42
r2_dict = {
    "LR": [],
    "GBR": [],
    "EN": [],
    "SGDR": [],
    "SVR": [],
    "BR": []
}
mae_dict = {
    "LR": [],
    "GBR": [],
    "EN": [],
    "SGDR": [],
    "SVR": [],
    "BR": []
}


for s in range(0, 8, 2):
    s_l, s_r = s, s+1
    print(f"Sensors {s_l} & {s_r}")

    X, y = get_regression_data([1, 2, 3, 5, 7, 9, 11], label_datetimes, s_l, s_r, as_log=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=rs)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LinearRegression()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "LR"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

    model = GradientBoostingRegressor()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "GBR"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

    model = ElasticNet()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "EN"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

    model = SGDRegressor()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "SGDR"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

    model = SVR()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "SVR"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

    model = BayesianRidge()
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    reg = "BR"
    score_r2 = np.round(r2_score(y_test, y_pred), 3)
    score_mae = np.round(mean_absolute_error(y_test, y_pred), 3)
    r2_dict[reg].append(score_r2)
    mae_dict[reg].append(score_mae)

import pandas as pd
df = pd.DataFrame(r2_dict)
df.to_excel("regressor_scores_r2.xlsx")
df = pd.DataFrame(mae_dict)
df.to_excel("regressor_scores_mae.xlsx")

Sensors 0 & 1
Sensors 2 & 3
Sensors 4 & 5
Sensors 6 & 7


In [17]:
def get_clf_data(days, classes, sl, sr, as_log=False):
    _, nets = get_daily_net_pair(days, sl, sr, as_log=as_log)
    nets = np.transpose(nets)
    targets = []
    for cls in classes:
        targets.extend([cls] * 100)

    targets = np.array(targets)
    return nets, targets

In [18]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score

from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier

In [19]:
random_state = 42
df_dict = {
    "Decision Tree": [],
    "GaussianNB": [],
    "BaggingTree": [],
    "SVM": [],
    "XGB": [],
    "Logistic Reg.": [],
    "MLP": [],
    "VotingClassifier": []
}

results = {"s_0_1": [], "s_2_3": [], "s_4_5": [], "s_6_7": []}


for s in range(0, 8, 2):
    s_l = s
    s_r = s+1
    print(f"Sensors {s_l} & {s_r}")

    X, y = get_clf_data(
        [1, 2, 3, 5, 7, 9, 11],
        [0, 0, 0, 1, 1, 2, 2],
        s_l, s_r, as_log=True)

    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    classifiers = []

    # decision tree
    clf_name = "Decision Tree"
    dt = DecisionTreeClassifier(criterion="entropy",
                                random_state=random_state)
    accuracy = np.mean(cross_val_score(dt, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    # classifiers.append((clf_name, dt))

    # gaussian naive bayes
    clf_name = "GaussianNB"
    gnb = GaussianNB()
    accuracy = np.mean(cross_val_score(gnb, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    classifiers.append((clf_name, gnb))

    # bagging
    clf_name = "BaggingTree"
    extra_tree = ExtraTreeClassifier(random_state=random_state)
    bc = BaggingClassifier(extra_tree, random_state=random_state)
    accuracy = np.mean(cross_val_score(bc, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    classifiers.append((clf_name, bc))

    # svm
    clf_name = "SVM"
    svm = SVC(kernel="linear", random_state=random_state)
    accuracy = np.mean(cross_val_score(svm, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    # classifiers.append((clf_name, svm))

    # xgb
    clf_name = "XGB"
    xgb = XGBClassifier(objective="multi:softprob",
                        random_state=random_state)
    accuracy = np.mean(cross_val_score(xgb, X, y_enc, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": "XGB", "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    # classifiers.append((clf_name, xgb))

    # log. reg.
    clf_name = "Logistic Reg."
    lr = LogisticRegression(max_iter=5000, random_state=random_state)
    accuracy = np.mean(cross_val_score(lr, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    # classifiers.append((clf_name, lr))

    # mlp
    clf_name = "MLP"
    mlp = MLPClassifier(
        hidden_layer_sizes=(256, ),
        activation="relu",
        learning_rate="adaptive",
        max_iter=10000,
        random_state=random_state)
    accuracy = np.mean(cross_val_score(mlp, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))
    classifiers.append((clf_name, mlp))

    # voting clf
    clf_name = "VotingClassifier"
    vclf = VotingClassifier(estimators=classifiers, voting="soft")
    accuracy = np.mean(cross_val_score(vclf, X, y, cv=5))
    results[f"s_{s_l}_{s_r}"].append(
        {"clf": clf_name, "accuracy": np.round(accuracy, 3)}
    )
    df_dict[clf_name].append(np.round(accuracy, 3))

Sensors 0 & 1
Sensors 2 & 3
Sensors 4 & 5
Sensors 6 & 7


In [20]:
df = pd.DataFrame(df_dict)
df.to_excel("model_accu_scores.xlsx")