In [25]:
import numpy as np
import pandas as pd
from tqdm import tqdm

## Medida del CRPS
def heavyside(prediction,actual):
    return prediction >= actual

def cdf_dif(prediction,actual):
    quantiles = np.arange(1,100)/100.0
    t=pd.Series(prediction)
    dif=t-t.shift(1)
    dif=dif.dropna()
    fs = sum(dif*((quantiles-heavyside(prediction,actual))[1:]**2))
    # If the actual is outside the range of the prediction, 
    # we need to account for that areas outside the range 
    if actual > prediction[-1]:
        fs += (actual-prediction[-1]) * 1
    if actual < prediction[0]:
        fs += (prediction[0]-actual) * 1
    return fs

def CRPS(predictions, actuals):
    difs_mean = [cdf_dif(predictions[i],actuals[i]) for i in range(len(actuals))]
    return np.mean(difs_mean)

def evaluate(predictions,target):

    res={}
    
    # Calculate the CRPS
    res["crps"]=CRPS(predictions,target)
    
    # Bonus useful Feature
    count = 0
    for i in range(len(target)):
        if (target[i]>predictions[i][0]) and (target[i]<predictions[i][-1]):
            count+=1
    res["count"]=count
    
    ## Calculate as well measures for the quantile 50
    total_df = pd.DataFrame(predictions)
    quantiles = np.arange(1,100)/100.0 
    total_df.columns=np.array(quantiles).astype(str)
    #RMSE       
    res["rmse"]=np.sqrt(np.mean((target-total_df["0.5"])**2))
    #MAE    
    res["mae"]=np.mean(np.abs(target-total_df["0.5"] ) )
    #Bias 
    res["bias"]=np.mean(target-total_df["0.5"])
    #Corr
    res["corr"]=np.corrcoef(target,total_df["0.5"])[0][1]

    res["all"]=predictions
    res["target"]=target
    
    return res

In [26]:
import numpy as np
import math
import pandas as pd
from tqdm import  tqdm
from sklearn.model_selection import KFold
from numpy.fft import fft
import datetime
import json
import numpy as np
from scipy import stats

def prepare_data_from_horizon(df, horizon=12):
    
    data=df.copy()

    sel = np.concatenate([[1,2,3,4],
                         [12],
                         24*np.arange(1,9),
                         12+24*np.arange(1,9)])  
    sel=np.concatenate([sel,sel-1,sel+1])  
    
    ## lagged NO2 values
    for i in sel:
        if (i>=horizon):
            data["NO2 - "+str(i)] = data["NO2"].shift(i)

    ## lagged O3 values
    for i in 24*np.arange(1,4):
        if (i>=horizon):
            data["O3 - "+str(i)] = data["O3"].shift(i)

    ## Remove empty values
    data=data.dropna()

    X=data[list(set(data.columns)-set(['DATE',"NO2","O3"]))]
    y=data["NO2"]
  
    return X, y

#"data/dataEscAgui.csv"
def get_data(path):

    df = pd.read_csv(path,sep=";")

    # Prepare data
    data = df[["DATE","SPA.NO2","SPA.O3","MACC.NO2"]].copy()
    data["DATE"]=pd.to_datetime(data["DATE"],format="%Y-%m-%d %H:%M:%S")
    data = data.sort_values("DATE")
    data.columns = ["DATE","NO2","O3","CAMS"]

    ## Remove everything from 2020
    data=data[data["DATE"].astype(str)<"2020"]

    ## Fourier Columns
    freqs = [2922,1461,209,1465,4]
    l = 35064
    n = np.arange(len(data))
    fcols = []
    for f in freqs:
        data["c"+str(f)]=np.cos(n*2*np.pi*f/l)
        fcols.append("c"+str(f))
        data["s"+str(f)]=np.cos(n*2*np.pi*f/l)
        fcols.append("s"+str(f))

    data["NO2"]=np.log1p(data["NO2"])
    data["O3"]=np.log1p(data["O3"])
    data["CAMS"]=np.log1p(data["CAMS"])

    ## Calendar Variables 
    ## Calendar Variables do not bring better results and therefore
    ## removed
    
    return data

In [27]:
import lightgbm as lgb
import numpy as np
import math
import pandas as pd
from tqdm import tqdm
from scipy import stats

class TotalLGBQuantile():
    
    def __init__(self,n_estimators,max_depth):
        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.quantiles=[0.022750131948179195,0.15865525393145707,0.5,0.8413447460685429,0.9772498680518208]
        self.estimators = []
        
    def fit(self,X_train,y_train):
        print("training !")
        for q in tqdm(self.quantiles):
            print(f"Quantile: {q}")
            reg = lgb.LGBMRegressor(n_estimators=self.n_estimators,
                                    objective= 'quantile',
                                    loss="quantile",
                                    alpha=q,
                                    random_state=2020,
                                   max_depth=self.max_depth)
                                
            reg.fit(X_train, y_train)
            self.estimators.append(reg)
        print("Done")
        
    def predict(self,X):
        predictions_gbr = []
        print("predicting")
        for reg in tqdm(self.estimators):
            predictions_gbr.append(reg.predict(X))
         
        total_pred={}
        for i in range(len(predictions_gbr)):
            total_pred[i]=predictions_gbr[i]
            
        total_df=pd.DataFrame(total_pred)

        def process_row(row):
            v = row.values
            dif_mean = np.abs(v-v[2])
            mu = v[2]
            s = np.mean([dif_mean[0]/2,dif_mean[1],dif_mean[3],dif_mean[4]/2])
            mi_norm = stats.norm(mu,s)
            quant=[]
            for quantile in np.arange(1,100)/100.0 :
                quant.append(mi_norm.ppf(quantile))
            return pd.Series(quant)
 
        total_df = total_df.apply(process_row,axis=1)
        
        return total_df.values


In [28]:
import numpy as np
import math
import pandas as pd
from tqdm import tqdm
from scipy import stats

from keras.models import Sequential
from keras.layers import Dense, LeakyReLU
from keras.callbacks import EarlyStopping
import keras.backend as K 

from sklearn.model_selection import train_test_split

def tilted_loss(q,y,f):
    e = (y-f)
    return K.mean(K.maximum(q*e, (q-1)*e), axis=-1)

class MLPQuantile():
    
    def __init__(self):

        self.estimators = []
        
    def fit(self,X_train,y_train):
        
        def MLPmodel():
            model = Sequential()
            model.add(Dense(len(X_train[0]), input_dim=len(X_train[0]), activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(int(len(X_train[0])/2), activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(int(len(X_train[0])/2), activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(1, activation='linear'))
            return model
        
        print("training !")

        X_ttrain, X_val, y_ttrain, y_val = train_test_split(X_train,y_train,test_size=.05,random_state=2020)

        for q in [0.022750131948179195,0.15865525393145707,0.5,0.8413447460685429,0.9772498680518208]:
            print(f"Quantile: {q}")
            model = MLPmodel()
            model.compile(loss=lambda y,f: tilted_loss(q,y,f), optimizer='adadelta')
            es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=50)
            history = model.fit(X_ttrain, y_ttrain, 
                                epochs=1000, batch_size=500,  
                                verbose=1,callbacks=[es],
                                validation_data=(X_val,y_val))
            self.estimators.append(model)
        print("Done")
        
    def predict(self,X):
        predictions_gbr = []
        print("predicting")
        for reg in tqdm(self.estimators):
            predictions_gbr.append(reg.predict(X))
         
        total_pred={}
        for i in range(len(predictions_gbr)):
            total_pred[i]=predictions_gbr[i][:,0]
            
        total_df=pd.DataFrame(total_pred)

        def process_row(row):
            v = row.values
            dif_mean = np.abs(v-v[2])
            mu = v[2]
            s = np.mean([dif_mean[0]/2,dif_mean[1],dif_mean[3],dif_mean[4]/2])
            mi_norm = stats.norm(mu,s)
            quant=[]
            for quantile in np.arange(1,100)/100.0 :
                quant.append(mi_norm.ppf(quantile))
            return pd.Series(quant)
 
        total_df = total_df.apply(process_row,axis=1)
        
        return total_df.values


In [29]:
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold

In [30]:
horizon = 13

df = get_data("../2018_2019_data/28079008.csv")
X, y = prepare_data_from_horizon(df,horizon)

kf = KFold(5,shuffle=True)
train_index, test_index = list(kf.split(X))[1]

train_index = X.index.values[train_index]
test_index = X.index.values[test_index] 
        
# Filter the test index when prediction time is 10:00
ten_index = df[(df["DATE"]-timedelta(hours=horizon)).dt.hour==10].index
test_index_10 = test_index[pd.Series(test_index).isin(ten_index)]
        
# We retrieve the indexes that are related to the test indexes according to our AR model
sel = np.concatenate([[1,2,3,4],
                            [12],
                            24*np.arange(1,9),
                            12+24*np.arange(1,9)])  
sel=np.concatenate([sel,sel-1,sel+1]) 
        
all_index_related_test = set([])
for i in sel:
    all_index_related_test |= set(test_index_10+i)
        
train_index_CV = train_index[pd.Series(train_index).isin(list(all_index_related_test))]
        
X_train = X.loc[train_index_CV]
y_train = y.loc[train_index_CV]
        
X_test = X.loc[test_index_10]
y_test = y.loc[test_index_10]
        
scaler = RobustScaler()
# Fit the scaler on the training features and transform these in one go
X_train_std = scaler.fit_transform(X_train)
# Scale the test set
X_test_std = scaler.transform(X_test)
        
lin = LinearRegression()
lin.fit(X_train_std,y_train)

dif_train = y_train-lin.predict(X_train_std)
dif_test = y_test-lin.predict(X_test_std)

In [48]:
from tqdm.notebook import tqdm

In [49]:
alls=[]

for n_estimator in tqdm([500,600,700,800,900,1000,1100]):
    for max_depth in tqdm([4,6,8,10,12,14]):
        qreg = TotalLGBQuantile(n_estimators=n_estimator,max_depth=max_depth)
        qreg.fit(X_train_std,y_train)
        preds = qreg.predict(X_test_std)
        results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
        del results["all"]
        del results["target"]
        results["n_estimator"]=n_estimator
        results["max_depth"]=max_depth
        alls.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=7.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




Exception ignored in: <function tqdm.__del__ at 0x11a1cf670>
Traceback (most recent call last):
  File "/Users/sebastien.perez/anaconda3/lib/python3.8/site-packages/tqdm/std.py", line 1122, in __del__
    self.close()
  File "/Users/sebastien.perez/anaconda3/lib/python3.8/site-packages/tqdm/notebook.py", line 261, in close
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))






In [53]:
pd.DataFrame(alls).sort_values("rmse")

Unnamed: 0,crps,count,rmse,mae,bias,corr,n_estimator,max_depth
41,13.112044,465,26.19557,18.389143,2.557965,0.750437,1100,14
35,13.147471,465,26.276481,18.426206,2.578699,0.748572,1000,14
29,13.158772,466,26.331609,18.480324,2.641233,0.747443,900,14
39,13.168241,458,26.344224,18.464018,2.598883,0.746472,1100,10
33,13.189537,458,26.373749,18.49069,2.602618,0.745804,1000,10
23,13.170047,466,26.398462,18.535942,2.678638,0.745948,800,14
21,13.182804,461,26.4182,18.533462,2.567523,0.744575,800,10
27,13.253145,457,26.419564,18.516333,2.592532,0.744601,900,10
17,13.209801,466,26.457832,18.559086,2.721503,0.744805,700,14
15,13.173501,464,26.500201,18.589906,2.596564,0.742723,700,10


In [55]:
alls=[]

for n_estimator in tqdm([1500,1700,1900]):
    for max_depth in tqdm([16,18,20]):
        qreg = TotalLGBQuantile(n_estimators=n_estimator,max_depth=max_depth)
        qreg.fit(X_train_std,y_train)
        preds = qreg.predict(X_test_std)
        results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
        del results["all"]
        del results["target"]
        results["n_estimator"]=n_estimator
        results["max_depth"]=max_depth
        alls.append(results)
pd.DataFrame(alls).sort_values("rmse")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


training !


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Quantile: 0.022750131948179195
Quantile: 0.15865525393145707
Quantile: 0.5
Quantile: 0.8413447460685429
Quantile: 0.9772498680518208

Done
predicting


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))






Unnamed: 0,crps,count,rmse,mae,bias,corr,n_estimator,max_depth
6,13.078775,462,26.113654,18.287802,2.57457,0.752331,1900,16
3,13.109792,462,26.143519,18.306575,2.57546,0.75159,1700,16
0,13.130781,463,26.231025,18.368446,2.586203,0.749561,1500,16
7,13.227243,458,26.258953,18.51734,2.703388,0.749923,1900,18
4,13.195215,460,26.317637,18.547053,2.709977,0.748563,1700,18
1,13.216615,460,26.415884,18.627242,2.755423,0.746428,1500,18
8,13.192756,461,26.429982,18.522709,2.545831,0.745003,1900,20
5,13.23744,461,26.467214,18.555937,2.593564,0.744335,1700,20
2,13.248307,461,26.518313,18.595631,2.621523,0.743264,1500,20
