In [48]:
# import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import lightgbm as lbg
from tqdm import tqdm
import polars as pl
import json
from abc import ABC, abstractmethod

In [42]:
class Config():

    ROOT = os.path.join("predict-energy-behavior-of-prosumers")
    

    # country id mapping
    COUNTRY_ID_TO_NAME_MAP_DIR = os.path.join(ROOT,"county_id_to_name_map.json")
    with open(COUNTRY_ID_TO_NAME_MAP_DIR,'r') as country_id_to_name:
        COUNTRY_ID_TO_NAME = json.load(country_id_to_name)

    # loading the weather station to country mapping 
    WEATHER_STATION_TO_COUNTRY_MAPPING_DIR = os.path.join(ROOT,"weather_station_to_county_mapping.csv")
    WEATHER_STATION_TO_COUNTRY_MAPPING = pl.read_csv(WEATHER_STATION_TO_COUNTRY_MAPPING_DIR,try_parse_dates = True)

    

    # paths of all the files for the training 
    TRAIN_CLIENT = os.path.join(ROOT,"client.csv")
    TRAIN_ELECTRICITY_PRICES = os.path.join(ROOT,"electricity_prices.csv")
    TRAIN_FORCAST_WEATHER = os.path.join(ROOT,"forecast_weather.csv")
    TRAIN_GAS_PRICES = os.path.join(ROOT,"gas_prices.csv")
    TRAIN_HISTORICAL_WEATHER = os.path.join(ROOT,"historical_weather.csv")
    TRAIN_TRAIN = os.path.join(ROOT, "train.csv")
    TRAIN_WEATHER_STATION_TO_COUNTRY_MAPPING = os.path.join(ROOT,"weather_station_to_country_mapping.csv")

    # paths of all the files for the testing
    TEST_CLIENT = os.path.join(ROOT,"example_test_files","client.csv")
    TEST_ELECTRICITY_PRICES = os.path.join(ROOT,"example_test_files","electricity_prices.csv")
    TEST_FORCAST_WEATHER = os.path.join(ROOT,"example_test_files","forecast_weather.csv")
    TEST_GAS_PRICES = os.path.join(ROOT,"example_test_files","gas_prices.csv")
    TEST_HISTORICAL_WEATHER = os.path.join(ROOT,"example_test_files","historical_weather.csv")
    TEST_TEST = os.path.join(ROOT,"example_test_files", "test.csv")
    TEST_WEATHER_STATION_TO_COUNTRY_MAPPING = os.path.join(ROOT,"example_test_files","weather_station_to_country_mapping.csv")



In [43]:
# DataStorage class will be injected into the feature engineering class 
# This will create a lot of memory instead create the data storage class and add wrapers arround it 
# just like adding decorators to the class so as to     

In [44]:
class DataStorage:

    def __init__(self):
        # loading all the training data 
        self.train_df_data = pl.read_csv(Config.TRAIN_TRAIN,try_parse_dates = True)
        self.train_df_client = pl.read_csv(Config.TRAIN_CLIENT,try_parse_dates = True)
        self.train_df_electricity_prices = pl.read_csv(Config.TRAIN_ELECTRICITY_PRICES,try_parse_dates = True)
        self.train_df_forcast_weather = pl.read_csv(Config.TRAIN_FORCAST_WEATHER,try_parse_dates = True)
        self.train_df_gas_prices = pl.read_csv(Config.TRAIN_GAS_PRICES,try_parse_dates = True)
        self.train_df_historical_weather = pl.read_csv(Config.TRAIN_HISTORICAL_WEATHER,try_parse_dates = True)

        # loading all the testing data 
        self.test_df_data = pl.read_csv(Config.TEST_TEST,try_parse_dates = True)
        self.test_df_client = pl.read_csv(Config.TEST_CLIENT,try_parse_dates = True)
        self.test_df_electricity_prices = pl.read_csv(Config.TEST_ELECTRICITY_PRICES,try_parse_dates = True)
        self.test_df_forcast_weather = pl.read_csv(Config.TEST_FORCAST_WEATHER,try_parse_dates = True)
        self.test_df_gas_prices = pl.read_csv(Config.TEST_GAS_PRICES,try_parse_dates = True)
        self.test_df_historical_weather = pl.read_csv(Config.TEST_HISTORICAL_WEATHER,try_parse_dates = True)
        

In [59]:
# the base class which is used in the decorator design pattern
class AddFeatures(ABC):
    @abstractmethod
    def addFeature():
        pass

In [60]:
# add general features 
class AddGeneralFeatures(AddFeatures):

    def __init__(self,df):
        self.df = df

    def addFeature(self,df):
        # Compute some operations 
        return df
          

In [64]:
class AddFeatureManager():

    def __init__(self,dataStorage : DataStorage):
        self.addGeneralFeatures = AddGeneralFeatures(dataStorage.train_df_data)
        self.df = dataStorage.train_df_data

    def addFeatures(self):
        self.allOperations = [self.addGeneralFeatures]
        for operation in self.allOperations:
            self.df = operation.addFeature(self.df)
        return self.df

In [65]:
data_storage = DataStorage()

In [66]:
add_feature_manager = AddFeatureManager(data_storage)

In [67]:
add_feature_manager.addFeatures()

county,is_business,product_type,target,is_consumption,datetime,data_block_id,row_id,prediction_unit_id
i64,i64,i64,f64,i64,datetime[μs],i64,i64,i64
0,0,1,0.713,0,2021-09-01 00:00:00,0,0,0
0,0,1,96.59,1,2021-09-01 00:00:00,0,1,0
0,0,2,0.0,0,2021-09-01 00:00:00,0,2,1
0,0,2,17.314,1,2021-09-01 00:00:00,0,3,1
0,0,3,2.904,0,2021-09-01 00:00:00,0,4,2
0,0,3,656.859,1,2021-09-01 00:00:00,0,5,2
0,1,0,0.0,0,2021-09-01 00:00:00,0,6,3
0,1,0,59.0,1,2021-09-01 00:00:00,0,7,3
0,1,1,0.0,0,2021-09-01 00:00:00,0,8,4
0,1,1,501.76,1,2021-09-01 00:00:00,0,9,4
