#### Imports

In [1]:
import pandas as pd
from river import stream, compose, preprocessing, evaluate, metrics, linear_model, utils, stats, feature_extraction
from river.tree import HoeffdingTreeRegressor
import datetime
import matplotlib.pyplot as plt
from tqdm import tqdm
from river import neighbors
import copy

#### Functions

In [None]:
def float_converter(a):
    try:
        a=float(a)
    except ValueError:
        a=None
    return a

past_metrics = {"LastDay": 0}


def divide_date(x):  #feature enginnering
    try:
        dt = datetime.datetime.strptime(x['Datetime'], "%Y-%m-%d %H:%M:%S")
        x["Year"]=dt.year
        x["Month"]=dt.month
        x["Day"]=dt.day
        x["WeekOfYear"]=dt.isocalendar().week
        x["DayOfWeek"]=dt.weekday()
        x["DayOfYear"]=dt.timetuple().tm_yday
        x["Weekend"]= int(dt.weekday() >= 5)
        x["Quarter"]=int((dt.month - 1) // 3 + 1)
        x["Hour"]=dt.hour
        x["LastDay"]=past_metrics["LastDay"]
    except:
        x["Year"] = None
        x["Month"] = None
        x["Day"] = None
        x["WeekOfYear"] = None
        x["DayOfWeek"] = None
        x["DayOfYear"] = None
        x["Weekend"] = None
        x["Quarter"] = None
        x["Hour"] = None
        x["LastDay"] = None
    return x


def fit_model(model, metric):
    # Dataset8
    energy_consumption = stream.iter_csv(dataset_path, target="AEP_MW", drop_nones=True, converters={'AEP_MW':float_converter})

    y_trues = []
    y_preds = []
    for x, y in tqdm(energy_consumption, desc="Processing data", unit=" sample"):
        y_pred = model.predict_one(x)

        model.learn_one(x, y)

        metric.update(y, y_pred)

        past_metrics["LastDay"] = y
        y_trues.append(y)
        y_preds.append(y_pred)

    return y_trues, y_preds


def print_model_data(y_trues, y_preds):
    
    plt.figure(figsize=(15,15))
    plt.subplot(4,1,1)
    plt.plot(y_trues[:1000], marker='.', linestyle='-', color='b', label='True values')
    plt.grid(True)

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,2)
    plt.plot(y_preds[:1000], marker='.', linestyle='-', color='r', label='Predicted values')
    plt.grid(True)

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,3)
    plt.plot(y_trues[:1000], marker='.', linestyle='-', color='b', label='True values')
    plt.plot(y_preds[:1000], marker='.', linestyle='-', color='r', label='Predicted values')

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,4)
    plt.plot(y_trues, marker='.', linestyle='-', color='b', label='True values')
    plt.plot(y_preds, marker='.', linestyle='-', color='r', label='Predicted values')

    # Personalización
    plt.title('Comparison of true and predicted values')
    plt.xlabel('Instance')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()

    # Mostrar la gráfica
    plt.show()

#### Preprocessing

Reading the dataset

In [3]:
# data = pd.read_csv("../dataset/AEP_hourly.csv")
# data['Datetime'] = pd.to_datetime(data['Datetime'])

# # Ordenar por la columna 'Fecha' en orden ascendente
# df_sorted = data.sort_values(by='Datetime', ascending=True, ignore_index=True)
# df_sorted.to_csv("../dataset/AEP_hourly.csv", index=False)

Create data preprocessing pipeline

In [4]:
preprocessing_pipeline = compose.Pipeline(
    ('features', compose.TransformerUnion(
        ('date_features', compose.FuncTransformer(divide_date)),
        ('last_7_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),7),target_name="last_7_mean")),
        ('last_14_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),14), target_name="last_14_mean"))
    )),
    ('drop_non_features', compose.Discard('Datetime')),
)

Training Bayesian Linear Regression Model

In [5]:
dataset_path = "../dataset/AEP_hourly.csv"

In [None]:
# Training Metrics
metric = metrics.RMSE()

# Creating the model
model_1 = copy.deepcopy(preprocessing_pipeline) | ('lin_reg_bay', linear_model.BayesianLinearRegression())

# Training the model
y_trues_bay, y_pred_bay = fit_model(model_1, metric)


Processing data: 121273 sample [00:42, 2849.11 sample/s]


Training Hoeffding Tree Regressor Model

In [None]:
# Training Metrics
metric = metrics.RMSE()

# Creating the model
model_2 = copy.deepcopy(preprocessing_pipeline) | ('lin_reg_hoef', HoeffdingTreeRegressor(grace_period=250))

# Training the model
y_trues_hoef, y_pred_hoef = fit_model(model_2, metric)

Processing data: 121273 sample [00:45, 2684.64 sample/s]


Training KNN Model

In [8]:
# Training Metrics
metric = metrics.RMSE()

# Creating the model
model_3 = copy.deepcopy(preprocessing_pipeline) | ('lin_reg_knn', neighbors.KNNRegressor())

# Training the model
y_trues_hoef, y_pred_hoef = fit_model(model_3, metric)

Processing data: 121273 sample [05:18, 380.19 sample/s]
