In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## Import Data

In [81]:
energy_data_path = './data/clean/energy.csv'
weather_data_path = './data/clean/weather.csv'

In [82]:
energy_df = pd.read_csv(energy_data_path)
weather_df = pd.read_csv(weather_data_path)

## Assist Functions

In [103]:
def lag_features_of_dataframe(df, features_to_lag, amount_to_lag):
    new_df = df.copy()
    for feature in features_to_lag:
        lagged_column_name = f"{feature}_{amount_to_lag}_lag"
        new_df[lagged_column_name] = df[feature].shift(amount_to_lag)
    return new_df

In [131]:
def run_linear_regression(X_train, Y_train, X_test):
    model = RandomForestRegressor(n_estimators=50)
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    return y_pred

In [132]:
def lasso_regression_weights(X_train, Y_train):
    model = Lasso(max_iter=10000)
    model.fit(X_train, Y_train)
    weights = model.coef_
    feature_weights = pd.Series(weights, index=X_train.columns)
    print(feature_weights)

In [134]:
def evaluate_regression(y_true, y_pred):
    mean_error = mean_absolute_error(y_true, y_pred)
    root_mse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f"Error: {mean_error} \nRoot MSE: {root_mse} \nR2: {r2}")

## Grind

In [129]:
df = pd.merge(weather_df, energy_df, on='dt_iso', how='inner')
df = df.reset_index()
df = df.drop(columns=['dt_iso'])

In [130]:
features = ['valencia_temp', 'valencia_wind_speed', 'valencia_cloud_cover_pct', 'valencia_precipitation_last_3hr', 'madrid_temp', 'madrid_wind_speed', 'madrid_cloud_cover_pct', 'madrid_precipitation_last_3hr', 'bilbao_temp', 'bilbao_wind_speed', 'bilbao_cloud_cover_pct', 'bilbao_precipitation_last_3hr', 'barcelona_temp', 'barcelona_wind_speed', 'barcelona_cloud_cover_pct', 'barcelona_precipitation_last_3hr', 'seville_temp', 'seville_wind_speed', 'seville_cloud_cover_pct', 'seville_precipitation_last_3hr']
features_to_predict = ['price actual', 'generation total', 'generation wind', 'generation water', 'generation solar']
features_to_predict = ['price actual']

In [135]:
X = df[features]
Y = df[features_to_predict]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2)