In [15]:
# importing helper functions
from data import *

In [16]:
# importing river
import numbers
from river import compose
from river import datasets
from river import evaluate
from river import linear_model
from river import neighbors
from river import metrics
from river import preprocessing
from river import neural_net
from river import ensemble
from river import tree
from river import stream
import graphviz

In [17]:
from tqdm import tqdm

In [18]:
# import preprocessing tools
import numpy as np
import pandas as pd

In [21]:
# dataset creation
def make_target(df, h):
    if h == 1024:
        hh = 24
        y = df.shift(periods=-hh, freq='1H')["pc_ma_1D"]
        df.pop('pc15')
        df.pop('pc30')
        df.pop('pc45')
        y = y[hh:]
        X = df[:-hh]
    elif h > 100:
        hh = h - 100
        y = df.shift(periods=-hh, freq='1H')["pc_ma_6H"]
        df.pop('pc15')
        df.pop('pc30')
        df.pop('pc45')
        y = y[hh:]
        X = df[:-hh]
    elif h >= 1:
        y = df.shift(periods=-h, freq='1H')["pc"]
        df.pop('pc15')
        df.pop('pc30')
        df.pop('pc45')
        y = y[h:]
        X = df[:-h]
    else: 
        y = {
            0.25: df["pc15"],
            0.5: df["pc30"],
            0.75: df["pc45"]
        }[h]
        df.pop('pc15')
        df.pop('pc30')
        df.pop('pc45')        
        X = df
    return X, y

# load data
static_df = load_static()
weather_df = pd.read_pickle('data/weather/weather.pkl')

In [24]:
sensors = [1, 24, 35, 43, 68, 103, 105, 106, 108, 109, 110, 116, 117, 122, 124, 144, 145, 157, 168, 177,
    179, 223, 225, 236, 239, 245, 249, 253, 257, 259, 262, 285, 286, 290, 293, 301, 303, 312, 358, 359, 362,
    365, 367, 377, 378, 379, 382, 384, 385, 388, 389, 391, 392, 394, 395]
# horizons = [0.25, 0.5, 0.75, 1, 3, 6, 12, 24, 36]
horizons = [106, 112, 118, 124, 1024]

models1 = [
    #('LinReg', 'linear_model.LinearRegression()'),
    ('KNN', 'neighbors.KNNRegressor()'),
    ('HT', 'tree.HoeffdingTreeRegressor(grace_period=20, leaf_prediction="adaptive")'),
    ('HAT', 'tree.HoeffdingAdaptiveTreeRegressor(grace_period=20, leaf_prediction="adaptive")'),
    ('ARF', 'ensemble.AdaptiveRandomForestRegressor(grace_period=20, leaf_prediction="adaptive")')    
]

models15 = [
    #('LinReg', 'linear_model.LinearRegression()'),
    ('KNN', 'neighbors.KNNRegressor()'),
    ('HT', 'tree.HoeffdingTreeRegressor(grace_period=20, leaf_prediction="adaptive", leaf_model=neighbors.KNNRegressor())'),
    ('HAT', 'tree.HoeffdingAdaptiveTreeRegressor(grace_period=20, leaf_prediction="adaptive", leaf_model=neighbors.KNNRegressor())'),
    ('ARF', 'ensemble.AdaptiveRandomForestRegressor(grace_period=20, leaf_prediction="adaptive", leaf_model=neighbors.KNNRegressor())')
]

result = []
# create progress bar
pbar = tqdm(total=50)

for s in sensors:
    for h in horizons:
        # use different models for 15-minute predictions
        models = models15
        if h == 0.25:
            models = models15
        for name, model in models: 
            output = "s{}h{}-{}".format(s, h, name)
            #print(output)

            # load sensor - last parameter is id
            final_df = make_final(static_df, weather_df, s)

            # make dataset
            X_df, y_df = make_target(final_df, h)

            hh = h
            if (hh < 1):
                hh = 1
            selected_features = get_all_candidates(s)
            X = X_df[selected_features]
            y = y_df

            dataset = stream.iter_pandas(X, y)            
            reg = eval(model)
            
            metric08 = metrics.R2()
            metric = metrics.R2()
            metricS08 = metrics.SMAPE()
            metricS = metrics.SMAPE()

            i = 0
            step = len(y) / 50

            pbar.reset()

            partial_result = { "sensor": s, "horizon": h, "model": name, "smape": [], "smape08": [], "r2": [], "r208": []}
                      
            for xi, yi in dataset:    
                i = i + 1    
                yp = reg.predict_one(xi)    
                reg.learn_one(xi, yi)      

                # update metrics
                metric.update(yi, yp)
                metricS.update(yi, yp)
                if i > len(y) * 0.8:
                    metric08.update(yi, yp)
                    metricS08.update(yi, yp)

                if i % step < 1:
                    pbar.update(1)
                    pbar.set_description("Sensor {}: h = {}, m = {}, R2 = {:.4f}".format(s, h, name, metric.get()))

                    partial_result["smape"].append(metricS.get())
                    partial_result["smape08"].append(metricS08.get())
                    partial_result["r2"].append(metric.get())
                    partial_result["r208"].append(metric08.get())

            pbar.update(1)
            pbar.set_description("Sensor {}: h = {}, m = {}, R2 = {:.4f}".format(s, h, name, metric.get()))
            partial_result["smape"].append(metricS.get())
            partial_result["smape08"].append(metricS08.get())
            partial_result["r2"].append(metric.get())
            partial_result["r208"].append(metric08.get())

            result.append(partial_result)

pbar.close()


  return 1 - (self._residual_sum_of_squares / total_sum_of_squares)
Sensor 395: h = 1024, m = ARF, R2 = 0.8020: 100%|██████████████████████████████████████| 50/50 [02:21<00:00,  2.84s/it]


In [25]:
# save features
with open("results/river_st_general_additional.pkl", "wb") as f:
    pickle.dump(result, f)

In [23]:
pbar.close()

Sensor 1: h = 106, m = KNN, R2 = 0.5638: 100%|█████████████████████████████████████████| 50/50 [00:07<00:00,  6.92it/s]
