# 04 - Prequential Sliding

É semelhante ao Prequential Expanding, mas aqui não se junta os blocos anteriores para treino. Ou seja, na primeira tenttiva usamos o bloco 1 como treino e o 2 como teste. Na segunda tentativa usamos o bloco 2 como treino e o bloco 3 como teste e por ai vai.

In [1]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor

In [2]:
data = pd.read_csv('data-processed/train.csv')

# Prequential Sliding

In [3]:
data['block'] = np.trunc(data['era']*.1).astype(int)
data.loc[data['block'] == 12, 'block'] = 11

In [4]:
data['block'].value_counts().sort_index()

0     24515
1     34600
2     37444
3     41101
4     43439
5     48186
6     46831
7     40403
8     43971
9     45609
10    46107
11    49602
Name: block, dtype: int64

In [5]:
results_val = []

for block in range(1,12):
    print("Train block {} - Validation Block {}".format(block - 1, block))
    
    train = data[data['block'] == block-1]
    val = data[data['block'] == block]
    
    X_train = train.filter(regex=r'feature')
    X_val = val.filter(regex=r'feature')

    y_train = train['target']
    y_val = val['target']
     
    mdl = LGBMRegressor(max_depth=5, num_leaves=2**5, learning_rate=0.01, n_estimators=2000, colsample_bytree=0.1, random_state=0)
    mdl.fit(X_train, y_train)
    
    predictions = pd.Series(mdl.predict(X_val))
    ranked_predictions = predictions.rank(pct=True, method="first")
    correlation = np.corrcoef(y_val, ranked_predictions)[0, 1]
    #print(correlation)
    
    results_val.append(correlation)
    print("Correlation {}".format(correlation))
    print()

Train block 0 - Validation Block 1
Correlation 0.03492548009783929

Train block 1 - Validation Block 2
Correlation 0.03553221654871381

Train block 2 - Validation Block 3
Correlation 0.04296995726018694

Train block 3 - Validation Block 4
Correlation 0.037621678419400575

Train block 4 - Validation Block 5
Correlation 0.018147158854925346

Train block 5 - Validation Block 6
Correlation 0.02192667450273779

Train block 6 - Validation Block 7
Correlation 0.024864635081096105

Train block 7 - Validation Block 8
Correlation 0.03004370841022358

Train block 8 - Validation Block 9
Correlation 0.03598199586372601

Train block 9 - Validation Block 10
Correlation 0.025698432695055572

Train block 10 - Validation Block 11
Correlation 0.03020776368773019



In [6]:
np.median(results_val)

0.03020776368773019

In [7]:
np.min(results_val)

0.018147158854925346

In [8]:
np.max(results_val)

0.04296995726018694

In [9]:
np.mean(results_val)

0.030719972856512293

In [10]:
len(results_val)

11

## Prequential Sliding With Gap

In [11]:
results_val = []

for block in range(2,12):
    print("Train block {} - Gap Block {} - Validation Block {}".format(block - 2, block - 1,  block))

    train = data[data['block'] == block-2]
    val = data[data['block'] == block]
    
    X_train = train.filter(regex=r'feature')
    X_val = val.filter(regex=r'feature')

    y_train = train['target']
    y_val = val['target']
     
    mdl = LGBMRegressor(max_depth=5, num_leaves=2**5, learning_rate=0.01, n_estimators=2000, colsample_bytree=0.1, random_state=0)
    mdl.fit(X_train, y_train)
    
    predictions = pd.Series(mdl.predict(X_val))
    ranked_predictions = predictions.rank(pct=True, method="first")
    correlation = np.corrcoef(y_val, ranked_predictions)[0, 1]
    #print(correlation)
    
    results_val.append(correlation)
    print("Correlation {}".format(correlation))
    print()

Train block 0 - Gap Block 1 - Validation Block 2
Correlation 0.0468018477612904

Train block 1 - Gap Block 2 - Validation Block 3
Correlation 0.03841817632654235

Train block 2 - Gap Block 3 - Validation Block 4
Correlation 0.034839140974773246

Train block 3 - Gap Block 4 - Validation Block 5
Correlation 0.014520762242386933

Train block 4 - Gap Block 5 - Validation Block 6
Correlation 0.0196579681438175

Train block 5 - Gap Block 6 - Validation Block 7
Correlation 0.004024340467224122

Train block 6 - Gap Block 7 - Validation Block 8
Correlation 0.007632211342157569

Train block 7 - Gap Block 8 - Validation Block 9
Correlation 0.0340842833635809

Train block 8 - Gap Block 9 - Validation Block 10
Correlation 0.01843576000136357

Train block 9 - Gap Block 10 - Validation Block 11
Correlation 0.02454795689731109



In [12]:
np.median(results_val)

0.022102962520564297

In [13]:
np.min(results_val)

0.004024340467224122

In [14]:
np.max(results_val)

0.0468018477612904

In [15]:
np.mean(results_val)

0.02429624475204477

In [16]:
len(results_val)

10

# Fim