In [1]:
import os

In [2]:
%pwd

'c:\\Tom\\HKA\\7_Semester\\Domänenprojekt_2\\DoPro'

## Test predictions

### Predicition config entity

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class PredictionConfig:
    """Entity-Class for prediction config params."""

    wind_model_dir: Path
    """Directory where the wind model is stored."""

    solar_model_dir: Path
    """Directory where the solar model is stored."""

    components_dir: Path
    """Directory where preprocessing components are stored."""

In [4]:
from src.dopro2_HEFTcom_challenge.constants import PARAMS_FILE_PATH, CONFIG_FILE_PATH
import yaml
from loguru import logger

In [5]:
class ConfigurationManager:
    """Class to manage all configurations."""

    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH
    ) -> None:
        """
        Constructor for ConfigurationManager Class.
        Creates artifacts folder.

        :param config_filepath: Path to config.yaml file
        :param params_filepath: Path to params.yaml file

        """
        with config_filepath.open("r", encoding="utf-8") as f:
            self.config: dict = yaml.safe_load(f)

        with params_filepath.open("r", encoding="utf-8") as f:
            self.params: dict = yaml.safe_load(f)

        os.makedirs(self.config["artifacts_root"], exist_ok=True)
        logger.info("created directory at: {}", self.config["artifacts_root"])

    def get_prediction_config(self) -> PredictionConfig:
        """
        Get all config params.

        :return: values from config.yaml
        :rtype: PredictionConfig
        """

        config = self.config["prediction"]

        prediction_config = PredictionConfig(
            wind_model_dir=config["wind_model_dir"],
            solar_model_dir=config["solar_model_dir"],
            components_dir=config["components_dir"]
        )

        return prediction_config

### create prediction component

In [6]:
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
import joblib
from dopro2_HEFTcom_challenge.utils import prep_submission_in_json_format, get_season, categorize_wind_dir
from dopro2_HEFTcom_challenge.entity import RebaseAPI

In [7]:
class Prediction:
    """Class to predict on new data from the Rebase API."""

    def __init__(self, config: PredictionConfig) -> None:
        """
        Constructor for Prediction class.

        :param config: config values from config.yaml
        """

        self.config = config
        self.api = RebaseAPI()

    @staticmethod
    def load_models(
        wind_path, 
        solar_path
    ) -> tuple[XGBRegressor, XGBRegressor]:
        logger.info("Load solar and wind model")

        wind_model_path = list(Path(wind_path)
                                .glob("*.json"))[0]
        wind_model = XGBRegressor()
        wind_model.load_model(wind_model_path)

        solar_model_path = list(Path(solar_path)
                                .glob("*.json"))[0]
        solar_model = XGBRegressor()
        solar_model.load_model(solar_model_path)
        return wind_model, solar_model
    
    def prepare_data(self) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]:

        logger.info("Prepare data for the prediction")

        latest_data = self.api.get_latest_forecast_data()

        prediction_data = latest_data.assign(
            season=latest_data["valid_time"].dt.month.apply(get_season),
            wind_dir_cat=latest_data["WindDirection:100"].apply(categorize_wind_dir),
            month=latest_data["valid_time"].dt.month,
            day=latest_data["valid_time"].dt.day,
            hour=latest_data["valid_time"].dt.hour
        )

        season_categories = prediction_data[["season"]]
        season_encoder_path = Path(self.config.components_dir) / "season_encoder"
        season_encoder = joblib.load(season_encoder_path)
        season_encoded = season_encoder.transform(season_categories)
        season_encoded_df = pd.DataFrame(
            season_encoded.toarray(),
            columns=season_encoder.get_feature_names_out()
        )

        windDir_categories = prediction_data[["wind_dir_cat"]]
        windDir_encoder_path = Path(self.config.components_dir) / "windDir_encoder"
        windDir_encoder = joblib.load(windDir_encoder_path)
        windDir_encoded = windDir_encoder.transform(windDir_categories)
        windDir_encoded_df = pd.DataFrame(
            windDir_encoded.toarray(),
            columns=windDir_encoder.get_feature_names_out()
        )

        prediction_data = pd.concat([prediction_data, season_encoded_df, windDir_encoded_df],
                                    axis=1)
        
        windspeed_pca = prediction_data[["WindSpeed", "WindSpeed:100"]]
        scale_pca_pipe_path = Path(self.config.components_dir) / "scale_pca_pipe_windspeed"
        scale_pca_pipe = joblib.load(scale_pca_pipe_path)
        windspeed_pca = scale_pca_pipe.transform(windspeed_pca)

        prediction_data["WindSpeedPCA"] = windspeed_pca

        wind_features = ["WindSpeedPCA", "hours_after", "season_autumn", "season_spring", 
                         "season_summer", "season_winter", "wind_dir_cat_E", "wind_dir_cat_N", 
                         "wind_dir_cat_NE", "wind_dir_cat_NW", "wind_dir_cat_S", 
                         "wind_dir_cat_SE", "wind_dir_cat_SW", "wind_dir_cat_W"]
        solar_features = ["temp_solar", "CloudCover", "SolarDownwardRadiation", 
                          "RelativeHumidity", "hours_after", "month", "day", "hour"]
        
        wind_data = prediction_data[wind_features].to_numpy()
        solar_data = prediction_data[solar_features].to_numpy()
        time_df = prediction_data[["valid_time"]]
        
        return wind_data, solar_data, time_df

    def predict(
            self,
            wind_data: np.ndarray,
            solar_data: np.ndarray,
            time_df: pd.DataFrame
    ) -> None:
        """Load model and latest forecasts to make the prediction."""

        wind_model, solar_model = self.load_models(self.config.wind_model_dir,
                                                   self.config.solar_model_dir)
        
        wind_predictions = wind_model.predict(wind_data)
        solar_predictions = solar_model.predict(solar_data)
        predictions_all = wind_predictions + solar_predictions
        predictions_all.sort(axis=1)

        quantil_cols = ["q10", "q20", "q30", "q40", "q50",
                        "q60", "q70", "q80", "q90"]
        prediction_df = pd.DataFrame(predictions_all, columns=quantil_cols)

        submission_data = time_df.join(prediction_df)
        submission_data["market_bid"] = submission_data["q50"]

        submission_data_json = prep_submission_in_json_format(submission_data)
        print(submission_data_json)

        # self.api.submit(submission_data_json)
        


In [8]:
try:
    config = ConfigurationManager()
    prediction_config = config.get_prediction_config()
    prediction = Prediction(config=prediction_config)
    wind_data, solar_data, time_df = prediction.prepare_data()
    prediction.predict(wind_data, solar_data, time_df)
except Exception as e:
    raise e

[32m2024-10-25 00:20:16.479[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m24[0m - [1mcreated directory at: artifacts[0m
[32m2024-10-25 00:20:16.490[0m | [1mINFO    [0m | [36m__main__[0m:[36mprepare_data[0m:[36m34[0m - [1mPrepare data for the prediction[0m
[32m2024-10-25 00:20:16.492[0m | [34m[1mDEBUG   [0m | [36mdopro2_HEFTcom_challenge.entity.rebase_api[0m:[36mquery_weather_latest[0m:[36m142[0m - [34m[1mPOST from https://api.rebase.energy/weather/v2/query, model=DWD_ICON-EU, lat=[53.77, 53.84, 53.9, 53.97, 54.03, 54.1], long=[1.702, 1.767, 1.832, 1.897, 1.962, 2.027], variables=WindSpeed, WindSpeed:100, WindDirection, WindDirection:100, Temperature, RelativeHumidity, type=grid[0m
[32m2024-10-25 00:20:17.618[0m | [34m[1mDEBUG   [0m | [36mdopro2_HEFTcom_challenge.entity.rebase_api[0m:[36mquery_weather_latest[0m:[36m148[0m - [34m[1mstatuscode=200[0m
[32m2024-10-25 00:20:18.002[0m | [34m[1mDEBUG   [0m | [36mdopro2_HEFTc

{'market_day': '2024-10-26', 'submission': [{'timestamp': '2024-10-25T22:00:00+00:00', 'market_bid': 92.39033508300781, 'probabilistic_forecast': {10: 22.474990844726562, 20: 46.40397262573242, 30: 63.56928253173828, 40: 81.61800384521484, 50: 92.39033508300781, 60: 111.19820404052734, 70: 137.33860778808594, 80: 170.01596069335938, 90: 228.94200134277344}}, {'timestamp': '2024-10-25T22:30:00+00:00', 'market_bid': 95.40059661865234, 'probabilistic_forecast': {10: 24.31940460205078, 20: 46.209896087646484, 30: 66.27447509765625, 40: 84.19629669189453, 50: 95.40059661865234, 60: 115.49651336669922, 70: 141.55950927734375, 80: 173.27207946777344, 90: 228.14830017089844}}, {'timestamp': '2024-10-25T23:00:00+00:00', 'market_bid': 98.6006851196289, 'probabilistic_forecast': {10: 24.773834228515625, 20: 47.714576721191406, 30: 65.04557800292969, 40: 83.4084243774414, 50: 98.6006851196289, 60: 118.00802612304688, 70: 147.7049102783203, 80: 185.79974365234375, 90: 232.1263427734375}}, {'timesta