## Imports y funciones

In [None]:
from river import stream, compose, preprocessing, evaluate, metrics, linear_model, utils, stats, feature_extraction
from river.tree import HoeffdingTreeRegressor
import datetime
import matplotlib.pyplot as plt
from river.drift import PageHinkley, KSWIN, ADWIN
import tqdm

def load_dataset(dataset_path: str):
    return stream.iter_csv(dataset_path, target="AEP_MW", drop_nones=True, converters={'AEP_MW':float_converter})

def float_converter(a):
    try:
        a=float(a)
    except ValueError:
        a=None
    return a

past_metrics = {"Last": 0}

def divide_date(x):#feature enginnering
    try:
        dt = datetime.datetime.strptime(x['Datetime'], "%Y-%m-%d %H:%M:%S")
        x["Year"]=dt.year
        x["Month"]=dt.month
        x["Day"]=dt.day
        x["WeekOfYear"]=dt.isocalendar().week
        x["DayOfWeek"]=dt.weekday()
        x["DayOfYear"]=dt.timetuple().tm_yday
        x["Weekend"]= int(dt.weekday() >= 5)
        x["Quarter"]=int((dt.month - 1) // 3 + 1)
        x["Hour"]=dt.hour
        x["Last"]=past_metrics["Last"]
    except:
        x["Year"] = None
        x["Month"] = None
        x["Day"] = None
        x["WeekOfYear"] = None
        x["DayOfWeek"] = None
        x["DayOfYear"] = None
        x["Weekend"] = None
        x["Quarter"] = None
        x["Hour"] = None
        x["Last"] = None
    return x

def train_model(model, data_stream, drift_detector = None, stop_point = None):
    metric = metrics.RMSE()
    metric2 = metrics.RMSE()
    y_trues = []
    y_preds = []
    drifts = []
    i=0
    previous_value = 0
    if not stop_point: stop_point = data_stream.__sizeof__   
    for x, y in tqdm.tqdm(data_stream, desc="Procesando"):
        y_pred = model.predict_one(x)
        model.learn_one(x, y)
        metric.update(y, y_pred)
        metric2.update(y, previous_value)
        past_metrics["Last"] = y
        previous_value = y
        y_trues.append(y)
        y_preds.append(y_pred)
            
        if drift_detector and i<stop_point:
            error = abs(y - y_pred)
            drift_detector.update(error)
            if drift_detector.drift_detected:
                print(f'Change detected at index {i}')
                drifts.append(i)
        if i == stop_point: break
        
        i+=1
        
    return metric, metric2, y_trues, y_preds, drifts

def plot_results(y_trues, y_preds):
    plt.figure(figsize=(15,15))
    plt.subplot(4,1,1)
    plt.plot(y_trues[:1000], marker='.', linestyle='-', color='b', label='True values')
    plt.grid(True)
    
    plt.title('First true 1000 instances')
    plt.xlabel('Instance')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,2)
    plt.plot(y_preds[:1000], marker='.', linestyle='-', color='r', label='Predicted values')
    plt.grid(True)
    
    plt.title('First predicted 1000 instances')
    plt.xlabel('Instance')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,3)
    plt.plot(y_trues[:1000], marker='.', linestyle='-', color='b', label='True values')
    plt.plot(y_preds[:1000], marker='.', linestyle='-', color='r', label='Predicted values')
    
    plt.title('Comparison of the first 1000 true and predicted values')
    plt.xlabel('Instance')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()

    plt.figure(figsize=(15,15))
    plt.subplot(4,1,4)
    plt.plot(y_trues, marker='.', linestyle='-', color='b', label='True values')
    plt.plot(y_preds, marker='.', linestyle='-', color='r', label='Predicted values')

    plt.title('Comparison of true and predicted values')
    plt.xlabel('Instance')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()

    plt.show()
    
def plot_concept_drifts(y_trues, drifts):
    plt.figure(figsize=(6,6))
    plt.plot(y_trues, marker='.', linestyle='-', color='b', label='True values')
    if drifts is not None:
        for drift_detected in drifts:
            plt.axvline(drift_detected, color='black') 

## Entrenamiento del modelo

In [None]:
model = compose.Pipeline(
    ('features', compose.TransformerUnion(
        ('date_features', compose.FuncTransformer(divide_date)),
        ('last_7_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),7),target_name="last_7_mean")),
        ('last_14_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),14), target_name="last_14_mean"))
    )),
    ('drop_non_features', compose.Discard('Datetime')),
    ('lin_reg', linear_model.BayesianLinearRegression())
)

energy_consumption = load_dataset(dataset_path="../dataset/AEP_hourly.csv")
metric_model, metric_base, true_values, predicted_values, _ = train_model(model=model, data_stream=energy_consumption)

print("Modelo", metric_model)
print("Base", metric_base)

plot_results(y_trues=true_values, y_preds=predicted_values)
                            

## Comprobacion del concept drift

In [None]:
energy_consumption = load_dataset(dataset_path="../dataset/AEP_hourly.csv")

model = compose.Pipeline(
    ('features', compose.TransformerUnion(
        ('date_features', compose.FuncTransformer(divide_date)),
        ('last_7_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),7),target_name="last_7_mean")),
        ('last_14_mean', feature_extraction.TargetAgg(by=None, how=utils.Rolling(stats.Mean(),14), target_name="last_14_mean"))
    )),
    ('drop_non_features', compose.Discard('Datetime')),
    ('lin_reg', linear_model.BayesianLinearRegression())
)

drift_detector = ADWIN()

_, _, y_trues, _, drifts = train_model(model=model, data_stream=energy_consumption, drift_detector=drift_detector, stop_point=(14*161))

plot_concept_drifts(y_trues=y_trues, drifts=drifts)
            