In [5]:
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

# Load the preprocessed data
modelling_table = pd.read_csv("preprocessed_data.csv")

# quantile regression
mod = smf.quantreg('total_generation_MWh ~ bs(Radiation_dwd,df=5) + bs(Radiation_ncep,df=5) + bs(WindSpeed_dwd,df=8) + bs(WindSpeed_ncep,df=8)', data=modelling_table)

forecast_models = dict()
for quantile in range(10,100,10):
    forecast_models[f"q{quantile}"] = mod.fit(q=quantile/100,max_iter=2500)
    modelling_table[f"q{quantile}"] = forecast_models[f"q{quantile}"].predict(modelling_table) 
    modelling_table.loc[modelling_table[f"q{quantile}"] < 0, f"q{quantile}"] = 0 #発電量は0以上
    
modelling_table.to_csv("analyzed_data.csv", index=False)

# Save the quantile regression models to files   
for quantile in range(10,100,10):
    forecast_models[f"q{quantile}"].save(f"models/model_q{quantile}.pickle")



In [6]:
modelling_table.head(10) 

Unnamed: 0,ref_datetime,valid_datetime,WindSpeed_dwd,WindSpeed_ncep,Radiation_dwd,Radiation_ncep,Wind_MWh_credit,Solar_MWh_credit,total_generation_MWh,q10,q20,q30,q40,q50,q60,q70,q80,q90
0,2020-09-20 00:00:00+00:00,2020-09-20 00:00:00+00:00,11.802604,11.338991,0.0,0.0,498.142,0.0,498.142,337.855561,413.34104,451.867176,476.58467,496.873006,514.294769,530.815951,546.990376,566.216242
1,2020-09-20 00:00:00+00:00,2020-09-20 00:30:00+00:00,11.648818,11.516161,0.0,0.0,478.788,0.0,478.788,341.78644,416.967877,455.285833,479.801279,499.866589,516.962622,533.148888,549.011983,567.35055
2,2020-09-20 00:00:00+00:00,2020-09-20 01:00:00+00:00,11.495032,11.693331,0.0,0.0,470.522,0.0,470.522,345.241052,419.661679,457.601129,481.874203,501.720564,518.533509,534.445118,550.10923,567.769527
3,2020-09-20 00:00:00+00:00,2020-09-20 01:30:00+00:00,11.354128,11.716686,0.0,0.0,482.183,0.0,482.183,341.997787,415.812435,453.722133,478.156793,498.223875,515.246657,531.429806,547.529965,565.65008
4,2020-09-20 00:00:00+00:00,2020-09-20 02:00:00+00:00,11.213223,11.740043,0.0,0.0,459.216,0.0,459.216,338.707292,411.703375,449.485121,474.055017,494.332309,511.569994,528.046838,544.620408,563.251392
5,2020-09-20 00:00:00+00:00,2020-09-20 02:30:00+00:00,11.174553,11.61622,0.0,0.0,469.597,0.0,469.597,331.993404,405.090474,443.230094,468.219982,488.95155,506.670325,523.687866,540.916421,560.606612
6,2020-09-20 00:00:00+00:00,2020-09-20 03:00:00+00:00,11.135883,11.492399,0.0,0.0,489.341,0.0,489.341,325.076839,398.147333,436.604091,462.005191,483.196723,501.414309,518.991988,536.912856,557.739678
7,2020-09-20 00:00:00+00:00,2020-09-20 03:30:00+00:00,11.195188,11.39506,0.0,0.0,494.893,0.0,494.893,322.264441,395.826876,434.597898,460.201866,481.587117,500.022996,517.803196,535.913838,557.234615
8,2020-09-20 00:00:00+00:00,2020-09-20 04:00:00+00:00,11.254492,11.297719,0.0,0.0,501.451,0.0,501.451,319.317011,393.255443,432.298945,458.095476,479.67617,498.342454,516.340563,534.670565,556.543569
9,2020-09-20 00:00:00+00:00,2020-09-20 04:30:00+00:00,11.388748,11.080603,0.780469,0.3,500.662,0.0,500.662,312.4964,386.918712,426.490868,452.658678,474.634567,493.870435,512.400716,531.338433,554.723853
