In [1]:
import pandas as pd
from river import compose
from river import linear_model
from river import metrics
from river import preprocessing
from river import stream
from river.feature_selection import SelectKBest
from river.metrics import Accuracy
import matplotlib.pyplot as plt
# Load the data
df = pd.read_csv('residential4_grid_import_export_weather_fixed_timestamps.csv')

# Convert the 'utc_timestamp' column to datetime
df['utc_timestamp'] = pd.to_datetime(df['utc_timestamp'])

# Extract day of the week and hour of the day
df['day_of_week'] = df['utc_timestamp'].dt.dayofweek
df['hour_of_day'] = df['utc_timestamp'].dt.hour




# Define the features and the target
features = ['hour_of_day', 'DE_KN_residential4_grid_export']
target = 'DE_KN_residential4_grid_import'

# Create a model
model = compose.Pipeline(
    SelectKBest(Accuracy(), k=2),
    preprocessing.StandardScaler(),
    linear_model.LinearRegression()
)
# Define a metric
metric = metrics.MAE()

# Iterate over the data and update the model and the metric
for xi, yi in stream.iter_pandas(df[features], df[target]):
    y_pred = model.predict_one(xi) if model else None
    model.learn_one(xi, yi)
    if y_pred is not None:
        metric.update(yi, y_pred)

print(f'MAE: {metric.get()}')



# Get the linear regression model from the pipeline
lr = model['LinearRegression']

# Get the feature importances
importances = lr.weights

# Sort the features by importance
sorted_features = sorted(importances.items(), key=lambda x: x[1], reverse=True)

# Plot the feature importances
plt.bar(*zip(*sorted_features))
plt.xticks(rotation=90)
plt.show()

KeyboardInterrupt: 