In [3]:
import pandas as pd
data = pd.read_csv("../dataset/AEP_hourly.csv")
data['Datetime'] = pd.to_datetime(data['Datetime'])

# Ordenar por la columna 'Fecha' en orden ascendente
df_sorted = data.sort_values(by='Datetime', ascending=True, ignore_index=True)
df_sorted.to_csv("../dataset/AEP_hourly.csv", index=False)

In [None]:
from river import stream, compose, preprocessing, evaluate, metrics, linear_model, utils, stats, feature_extraction
from river.tree import HoeffdingTreeClassifier
import datetime

dataset_path = "../dataset/AEP_hourly.csv"

def float_converter(a):
    try:
        a=float(a)
    except ValueError:
        a=None
    return a

past_metrics = {"LastDay": 0}

def divide_date(x):#feature enginnering
    try:
        dt = datetime.datetime.strptime(x['Datetime'], "%Y-%m-%d %H:%M:%S")
        x["Year"]=dt.year
        x["Month"]=dt.month
        x["Day"]=dt.day
        x["WeekOfYear"]=dt.isocalendar().week
        x["DayOfWeek"]=dt.weekday()
        x["DayOfYear"]=dt.timetuple().tm_yday
        x["Weekend"]= int(dt.weekday() >= 5)
        x["Quarter"]=int((dt.month - 1) // 3 + 1)
        x["Hour"]=dt.hour
        x["LastDay"]=past_metrics["LastDay"]
    except:
        x["Year"] = None
        x["Month"] = None
        x["Day"] = None
        x["WeekOfYear"] = None
        x["DayOfWeek"] = None
        x["DayOfYear"] = None
        x["Weekend"] = None
        x["Quarter"] = None
        x["Hour"] = None
        x["LastDay"] = None
    return x

energy_consumption = stream.iter_csv(dataset_path, target="AEP_MW", drop_nones=True, converters={'AEP_MW':float_converter})

model = compose.Pipeline(
    ('features', compose.TransformerUnion(
        ('date_features', compose.FuncTransformer(divide_date)),
        ('last_7_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),7),target_name="last_7_mean")),
        ('last_14_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),14), target_name="last_14_mean"))
    )),
    ('drop_non_features', compose.Discard('Datetime')),
    ('lin_reg', linear_model.BayesianLinearRegression())
)

metric = metrics.MAE()

for x, y in energy_consumption:
    y_pred = model.predict_one(x)
    model.learn_one(x, y)
    metric.update(y, y_pred)
    past_metrics["LastDay"] = y

print(x)
print(metric)
                            

12379.0
0.0
11935.0
12378.999992638477
11692.0
11942.965157473542
11597.0
11696.006794921537
11681.0
11514.988208853652
12280.0
11543.342296857798
13692.0
12006.882463543243
14618.0
13098.589361667642
14903.0
14158.773721947473
15118.0
22853.54207904895
15242.0
16032.415250796199
15375.0
15738.949813381443
15404.0
15710.265783877383
15655.0
15734.327499077403
15739.0
16058.670935923972
15739.0
16497.02330594002
15644.0
16946.22153538852
15353.0
17302.203224858153
15034.0
17248.12251650886
15211.0
16645.171222939374
15349.0
16097.547930221619
14837.0
15840.779688635163
14067.0
15485.311852514393
13147.0
11932.191662773343
12260.0
12786.426858221079
11672.0
12389.981914456326
11352.0
12004.52640867946
11177.0
11601.427532012694
11142.0
11232.852662568004
11331.0
11049.125059294416
11866.0
11132.488526110717
12387.0
11531.444186667492
13144.0
12104.86726020361
13712.0
12817.403953221406
14082.0
13619.633868553086
14080.0
14449.71817257422
14056.0
15070.067011257373
13934.0
15377.404694916