In [1]:
import numpy as np
import tensorflow as tf


from src.loaders.data_loader import load_train_data, load_submission_data
from src.preprocessors.feature_engineering import feature_engineering_cnn
from src.preprocessors.dataset_creators import create_cnn_dataset
from src.preprocessors.splitors import split_train_valid_test_for_cv
from src.model.multiple_output.convolution import Convolution2DVarious
from src.loaders.experiment_loader import load_experiment_cnn
from src.trainers import compile_and_fit_with_pinball_loss
from src.make_submissions.create_submission import create_submission_using_cnn_model
from src.make_submissions.create_submission import evaluate_with_submission_cnn

setting_index = 1
experiment_setting = load_experiment_cnn(setting_index)

days = experiment_setting["days"]
selector = experiment_setting["columns"]

np.random.seed(42)
tf.random.set_seed(42)

In [None]:
df = load_train_data()
df = feature_engineering_cnn(df)

history = {
    "total_error": 0,
    "submission_df": [],
    "total_submission_error": 0,
}

for index, (train, valid, test) in enumerate(split_train_valid_test_for_cv(df, 0.5)):
    history[index] = {}
    
    window_generator = create_cnn_dataset(train, valid, test, days, selector)
    submission_df = load_submission_data()
    
    for i in range(1, 10):
        q = i/10
        history[index][q] = {"cnn":[], "error": []}
        
        conv_various = Convolution2DVarious(days, 96, len(selector))
        compile_and_fit_with_pinball_loss(conv_various, window_generator, q)
        
        history["total_error"] += conv_various.evaluate(window_generator.test, verbose=0)[0]
        
        submission_df[f"q_{q}"] = create_submission_using_cnn_model(selector, conv_various, train, days)
        history["total_submission_error"] += evaluate_with_submission_cnn(q, train, conv_various, selector, days)
        
    history["submission_df"].append(submission_df)
    
import json
experiment_setting["total_error"] = history["total_error"] / (36 * df.shape[0] * 0.2)
experiment_setting["total_submission_error"] = history["total_submission_error"] / (36 * 80 * 96)

with open(f"experiments/cnn/{setting_index}.json", "w") as f:
    json.dump(experiment_setting, f, indent=4)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000

In [None]:
print(history["total_error"]/ (36 * df.shape[0] * 0.2), history["total_submission_error"] / (36 * 80 * 96))

In [None]:
submission_df = load_submission_data()

result_list = history["submission_df"]
for result_df in result_list:
    for i in range(1, 10):
        q = i/10
        submission_df[f"q_{q}"] += result_df[f"q_{q}"]
                      
for i in range(1, 10):
    q = i/10
    submission_df[f"q_{q}"] = submission_df[f"q_{q}"] / (index + 1)

In [None]:
submission_df.to_csv(f"lightgbm_gbdt_split_dataset_cnn_setting_index_{setting_index}.csv", index=False)

In [None]:
142800 / (48 * 81)

In [None]:
680400 / ( 48 * 81 * 5)