In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import math, collections, time, os, gc, re, joblib, json
from copy import deepcopy
import seaborn as sns
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from collections import Counter, defaultdict
from itertools import product
import lightgbm as lgb
from geopy.geocoders import Nominatim
from pathlib import Path
from tqdm import tqdm
import optuna
pd.options.display.max_columns = 150

In [3]:
import utils

In [4]:
with open('../input/predict-energy-behavior-of-prosumers/county_id_to_name_map.json', mode = 'r', encoding = 'utf-8') as f:
    mapping = json.load(f)
parsed_counties = {v.lower().rstrip("maa"): k for k, v in mapping.items()}

name_mapping = {
    "valga": "valg",
    "põlva": "põlv",
    "jõgeva": "jõgev",
    "rapla": "rapl",
    "järva": "järv"
}

In [5]:
files = list(Path("../input/predict-energy-behavior-of-prosumers/").glob("*.csv"))
for file in files:
    print(file)

../input/predict-energy-behavior-of-prosumers/client.csv
../input/predict-energy-behavior-of-prosumers/gas_prices.csv
../input/predict-energy-behavior-of-prosumers/electricity_prices.csv
../input/predict-energy-behavior-of-prosumers/weather_station_to_county_mapping.csv
../input/predict-energy-behavior-of-prosumers/historical_weather.csv
../input/predict-energy-behavior-of-prosumers/train.csv
../input/predict-energy-behavior-of-prosumers/forecast_weather.csv


In [6]:
file_dict = {
    "client":"../input/predict-energy-behavior-of-prosumers/client.csv",
    "electricity":"../input/predict-energy-behavior-of-prosumers/electricity_prices.csv",
    "forecast_weather":"../input/predict-energy-behavior-of-prosumers/forecast_weather.csv",
    "gas":"../input/predict-energy-behavior-of-prosumers/gas_prices.csv",
    "historical_weather":"../input/predict-energy-behavior-of-prosumers/historical_weather.csv",
    "train":"../input/predict-energy-behavior-of-prosumers/train.csv"
}

In [7]:
dfs_train = {k:pd.read_csv(v) for k,v in file_dict.items()}

In [8]:
df = utils.create_dataset_from_dataframes(dfs_train)
correlation_for_shifts = utils.get_correlation_for_shifts(df, 'target')
train_pivot = df.pivot_table(index = "datetime", columns = ["county", "is_business", "product_type", "is_consumption"], values = "target")

train_shifted, _ = utils.include_shifts_by_group(
    df_train = df, 
    number_of_shifted_target = 3, 
    correlation_for_shifts = correlation_for_shifts,
    df_train_pivot = train_pivot
)

train_shifted = train_shifted.drop("target", axis = 1).dropna()

df = df.merge(
    train_shifted,
    on = ['datetime', 'county', 'is_business', 'product_type', 'is_consumption'],
    suffixes = ("", "")
)

100%|██████████| 28/28 [00:03<00:00,  8.09it/s]


In [9]:
optimal_params = {'objective':'mean_absolute_error', 'num_iterations':400, 'max_depth':11, 'num_leaves':2**10, 'random_state':0, 'verbosity':0, 'early_stopping_round':10, 'force_col_wise':True, 'boosting_type':'gbdt'}

In [10]:
model = lgb.LGBMRegressor(**optimal_params)
features_train, target_train = df.drop(['target', 'datetime'], axis = 1), df.target
target_train /= df.installed_capacity
features_train = features_train.drop("installed_capacity", axis = 1)

In [11]:
model.fit(
    X = features_train,
    y = target_train,
    eval_metric = 'mean_absolute_error',
    eval_set=[(features_train, target_train)],
    eval_names = ['training_error'],
    callbacks = [lgb.log_evaluation(optimal_params['num_iterations']//10)]
)



[40]	training_error's l1: 0.0432763
[80]	training_error's l1: 0.0391541
[120]	training_error's l1: 0.03812
[160]	training_error's l1: 0.0370024
[200]	training_error's l1: 0.0351182
[240]	training_error's l1: 0.0337265
[280]	training_error's l1: 0.0326886
[320]	training_error's l1: 0.0317915
[360]	training_error's l1: 0.0312515
[400]	training_error's l1: 0.0307915


In [12]:
import enefit
env = enefit.make_env()
iter_test = env.iter_test()

In [13]:
counter = 0
for (test, revealed_targets, client_test, historical_weather_test,
        forecast_weather_test, electricity_prices_test, gas_prices_test, sample_prediction_test) in iter_test:
    
    dfs_test = {"train":test, "client":client_test, "electricity":electricity_prices_test, "gas":gas_prices_test}
    
    _, test_shifted = utils.include_shifts_by_group(
        df_train = df,
        number_of_shifted_target = 3,
        correlation_for_shifts = correlation_for_shifts,
        df_train_pivot = train_pivot,
        df_test = test
    )
    
    features_test = utils.create_dataset_from_dataframes(dfs_test)
    installed_capacity_test = features_test.installed_capacity
    features_test = features_test.merge(
    test_shifted,
    on = ['datetime', 'county', 'is_business', 'product_type', 'is_consumption'],
    suffixes = ("", "")
    )
    
    features_test = features_test[features_train.columns]
    features_test = features_test.fillna(features_train.mean(axis = 0))
    predictions = model.pedict(features_test) * installed_capacity_test
    sample_prediction_test['target'] = predictions
    env.predict(sample_prediction_test)
    counter += 1

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


AttributeError: 'LGBMRegressor' object has no attribute 'pedict'