In [None]:
from pathlib import Path
from dotenv import load_dotenv
import numpy as np
import warnings
import pandas as pd
import hopsworks
import datetime
import os
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score

import openmeteo_requests

import requests_cache
from retry_requests import retry

warnings.filterwarnings("ignore")


In [None]:
load_dotenv("../.env", override=True)
HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
if HOPSWORKS_API_KEY is None:
    raise RuntimeError(
        "HOPSWORKS_API_KEY not found. "
    )


project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY, project="project_scalable")
fs = project.get_feature_store()

In [None]:
weather_fg = fs.get_feature_group(
    name='weather_data',
    version=1,
)

solar_enery_fg = fs.get_feature_group(
    name='solar_energy',
    version=1,
)

#feat_for_training= solar_enery_fg.select(["datetime", "MWh", "area"]).join(weather_fg.select_features(), on = ["area"])
feat_for_training = solar_enery_fg.select(["datetime", "MWh"]).join(weather_fg.select_features(), on = ["datetime"])



feature_view = fs.get_or_create_feature_view(
    name='solar_and_weather',
    description="weather features and solar energy data",
    version=1,
    labels=['MWh'],
    query=feat_for_training,
)


In [None]:

# Features
df = feature_view.read()
feature_cols = ["temperature_2m", "shortwave_radiation", "cloud_cover", "wind_speed_10m", "precipitation"]
df = df.sort_values("datetime")
X = df[feature_cols]
y = df["MWh"]

test_start = datetime(2025, 1, 1)
X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_start=test_start
)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)


xgb = XGBRegressor(n_estimators=100, random_state=42)
xgb.fit(X_train, y_train)
print('trained model')


y_pred = xgb.predict(X_test)
y_pred = np.maximum(y_pred, 0)
print('prediction done')
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R squared:", r2)



In [None]:
df_results = X_test.copy()

df_results["datetime"] = df.loc[X_test.index, "datetime"]
df_results["actual_MWh"] = y_test.values[:len(df_results)]
df_results["predicted_MWh"] = y_pred[:len(df_results)]

df_results = df_results.reset_index(drop=True)

df_results.head(8)

In [None]:

# Save model locally
model_dir = "prediction_model_2"
os.makedirs(model_dir, exist_ok=True)
xgb.save_model(f"{model_dir}/model.json")

print("Model saved locally.")

In [None]:

mr = project.get_model_registry()

res_dict = { "MSE": mse, "R squared": r2 }
model_hops = mr.python.create_model(
    name="energy_predict",
    metrics=res_dict,
    description="Energy prediction using XGBoost",
    version= 1
)
model_hops.save(model_dir)
print("Model uploaded to Hopsworks.")