In [1]:
# importing helper functions
from data import *

In [2]:
# importing river
import numbers
from river import compose
from river import datasets
from river import evaluate
from river import linear_model
from river import neighbors
from river import metrics
from river import preprocessing
from river import tree
from river import stream
import graphviz

In [3]:
from tqdm import tqdm

In [4]:
# import preprocessing tools
import numpy as np
import pandas as pd

In [5]:
# load data
static_df = load_static()
weather_df = pd.read_pickle('data/weather/weather.pkl')

In [9]:
sensors = [1, 24, 35, 43, 68, 103, 105, 106, 108, 109, 110, 116, 117, 122, 124, 144, 145, 157, 168, 177,
    179, 223, 225, 236, 239, 245, 249, 253, 257, 259, 262, 285, 286, 290, 293, 301, 303, 312, 358, 359, 362,
    365, 367, 377, 378, 379, 382, 384, 385, 388, 389, 391, 392, 394, 395]

def make_target_X(df):
    horizons = [1, 3, 6, 12, 24, 36]
    horizons_ma6 = [6, 12, 18, 24]
    horizons_1d = [24]
    X = df
    for h in horizons:        
        y = df.shift(periods=-h, freq='1H')["pc"].rename("pc_t{}".format(h))                 
        X = pd.concat([X, y], axis=1, join="inner")
    for h in horizons_ma6:
        y = df.shift(periods=-h, freq='1H')["pc_ma_6H"].rename("pc_ma6_t{}".format(h))                 
        X = pd.concat([X, y], axis=1, join="inner")
    for h in horizons_1d:
        y = df.shift(periods=-h, freq='1H')["pc_ma_1D"].rename("pc_ma24_t{}".format(h))                 
        X = pd.concat([X, y], axis=1, join="inner")
    return X

# create progress bar
pbar = tqdm(total=50)

result = []

# load sensor - last parameter is id
for s in sensors:
    final_df = make_final(static_df, weather_df, s)
    X_df = make_target_X(final_df)

    selected_features = get_all_candidates(s)
    X = X_df[selected_features]
    target_features = ["pc15", "pc30", "pc45", "pc_t1", "pc_t3", "pc_t6", "pc_t12", "pc_t24", "pc_t36", 
                       "pc_ma6_t6", "pc_ma6_t12", "pc_ma6_t18", "pc_ma6_t24", "pc_ma24_t24"]
    y = X_df[target_features]

    dataset = stream.iter_pandas(X, y)

    modelI = tree.iSOUPTreeRegressor(
        grace_period=20,
        leaf_prediction='adaptive',
        leaf_model={
            'pc15': neighbors.KNNRegressor(),
            'pc30': neighbors.KNNRegressor(),
            'pc45': neighbors.KNNRegressor(),        
            'pc_t1': neighbors.KNNRegressor(),
            'pc_t3': neighbors.KNNRegressor(),
            'pc_t6': neighbors.KNNRegressor(),
            'pc_t12': neighbors.KNNRegressor(),
            'pc_t24': neighbors.KNNRegressor(),
            'pc_t36': neighbors.KNNRegressor(),
            'pc_ma6_t6': neighbors.KNNRegressor(),
            'pc_ma6_t12': neighbors.KNNRegressor(),
            'pc_ma6_t18': neighbors.KNNRegressor(),
            'pc_ma6_t24': neighbors.KNNRegressor(),
            'pc_ma24_t24': neighbors.KNNRegressor()            
        }
    )

    metric08 = metrics.multioutput.MicroAverage(metrics.R2())
    metric = metrics.multioutput.MicroAverage(metrics.R2())
    metricS08 = metrics.multioutput.MicroAverage(metrics.SMAPE())
    metricS = metrics.multioutput.MicroAverage(metrics.SMAPE())

    i = 0
    step = len(y) / 50
    # print(step)
    
    pbar.reset()
    
    partial_result = { "sensor": s, "horizon": -1, "model": "iSOUPTree", "smape": [], "smape08": [], "r2": [], "r208": []}

    for xi, yi in dataset:    
        i = i + 1    
        yp = modelI.predict_one(xi)    
        modelI.learn_one(xi, yi)      

        # update metrics
        metric.update(yi, yp)
        metricS.update(yi, yp)
        if i > len(y) * 0.8:
            metric08.update(yi, yp)
            metricS08.update(yi, yp)
            
        if i % step < 1:
            pbar.update(1)
            pbar.set_description("Sensor {}: R2 = {:.4f}".format(s, metric.get()))
            
            partial_result["smape"].append(metricS.get())
            partial_result["smape08"].append(metricS08.get())
            partial_result["r2"].append(metric.get())
            partial_result["r208"].append(metric08.get())
            
            

    pbar.update(1)
    pbar.set_description("Sensor {}: R2 = {}".format(s, metric.get()))
    partial_result["smape"].append(metricS.get())
    partial_result["smape08"].append(metricS08.get())
    partial_result["r2"].append(metric.get())
    partial_result["r208"].append(metric08.get())
    
    result.append(partial_result)
    
pbar.close()

  return 1 - (self._residual_sum_of_squares / total_sum_of_squares)
Sensor 395: R2 = 0.6359008795527346: : 51it [03:09,  3.71s/it]                                                         


In [8]:
pbar.close()

Sensor 1: R2 = 0.4423:   4%|██▍                                                         | 2/50 [00:15<06:10,  7.72s/it]


In [10]:
# save features
with open("results/river_isouptree_additional.pkl", "wb") as f:
    pickle.dump(result, f)