In [1]:
import joblib
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error as mae
from timm.optim import NovoGrad
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import wandb

from src.constants import TARGETS

seed=123786000
pl.seed_everything(seed)

Global seed set to 123786000


123786000

In [2]:
class PivotData(Dataset):
    def __init__(self, data, labels, scaler):
        super().__init__()
        self.data = data.copy().astype(np.float32)
        self.data[np.isnan(self.data)] = 0
        a, b, c = self.data.shape
        self.data = scaler.transform(self.data.reshape(-1, c)).reshape(a, b, c)
        self.labels = labels.astype(np.float32)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], np.sqrt(self.labels[idx])/10.0

In [3]:
X_tr, X_vl = np.load("data/X_tr1_v1.npy"), np.load("data/X_vl1_v1.npy")

onehot = OneHotEncoder(sparse=False, handle_unknown='ignore')
X_tr_cats = onehot.fit_transform(X_tr[:, [14, 15, 21, 22]])

X_tr1 = np.hstack((X_tr_cats, X_tr[:, np.r_[0:14, 16:21, 23:379]]))
X_vl_cats = onehot.transform(X_vl[:, [14, 15, 21, 22]])

X_vl1 = np.hstack((X_vl_cats, X_vl[:, np.r_[0:14, 16:21, 23:379]]))
print(X_tr1.shape, X_vl1.shape)

(1424400, 422) (18992, 422)


In [4]:
scaler= MinMaxScaler((-1, 1))
scaler.fit(X_tr1)

MinMaxScaler(feature_range=(-1, 1))

In [5]:
# data_dict1 = joblib.load("data/artifacts/v0/train_scores1_last.pkl")
# data_dict2 = joblib.load("data/artifacts/v0/train_scores2_last.pkl")
# data_dict3 = joblib.load("data/artifacts/v0/train_scores3_last.pkl")
# data_dict4 = joblib.load("data/artifacts/v0/train_scores4_last.pkl")
# data_dict5 = joblib.load("data/artifacts/v0/train_scores5_last.pkl")

In [6]:
#data_dict['date']
#score_tr_dates, score_vl_dates = data_dict1['date'][:-16], data_dict1['date'][-15:]

In [7]:
#score_vl_dates

In [8]:
# train_scores = np.concatenate((data_dict1["data"][:-16], data_dict2["data"][:-16], data_dict3["data"][:-16],
#                                data_dict4["data"][:-16], data_dict5["data"][:-16]), axis=-1)
# valid_scores = np.concatenate((data_dict1["data"][-15:], data_dict2["data"][-15:], data_dict3["data"][-15:],
#                                data_dict4["data"][-15:], data_dict5["data"][-15:]), axis=-1)

# train_scores.shape, valid_scores.shape

In [9]:
# train_scores[np.isnan(train_scores)] = 0
# valid_scores[np.isnan(valid_scores)] = 0


In [10]:
# scaler= MinMaxScaler((-1, 1))
# scaler.fit(train_scores.reshape(-1, 76))

In [11]:
# players = data_dict1['playerId']
# players[:5]

In [12]:
def reshape_targets(df_index, data):
    dates, date_indices = np.unique(df_index.date.values, return_inverse=True)
    playerids = df_index.playerId.values
    players = np.unique(playerids)

    target_arr = np.zeros((len(dates), len(players), data.shape[1]), dtype=np.float32)
    date_to_idx = {dt: i for i, dt in enumerate(dates)}
    pid_to_idx = {pid: i for i, pid in enumerate(players)}
    for dti, dt in enumerate(dates):
        sub_data = data[date_indices == dti]
        pids = playerids[date_indices == dti]        
        for pid, tgt in zip(pids, sub_data):
            target_arr[dti, pid_to_idx[pid]] = tgt
    return target_arr, dates
                

In [13]:
tr_index, vl_index = pd.read_csv("data/tr_index_small.csv"), pd.read_csv("data/vl_index_small.csv")

In [14]:
tr_targets, tr_dates = reshape_targets(tr_index, tr_index[TARGETS].values)
vl_targets, vl_dates = reshape_targets(vl_index, vl_index[TARGETS].values)

In [15]:
tr_index

Unnamed: 0,playerId,target1,target2,target3,target4,date
0,628317,0.011167,4.474708,0.005168,5.735294,20180101
1,547989,0.042993,5.593385,0.045033,2.794118,20180101
2,519317,0.974327,56.177043,13.693746,64.166667,20180101
3,607625,0.006700,2.675097,0.005168,1.862745,20180101
4,641553,0.011725,3.842412,1.001801,0.686275,20180101
...,...,...,...,...,...,...
1424395,592464,0.000000,0.014978,0.000000,0.080090,20210414
1424396,534606,0.001455,0.056167,0.000000,0.226138,20210414
1424397,622268,0.000582,0.235902,0.000000,0.075379,20210414
1424398,666163,0.000000,0.029956,0.000000,0.018845,20210414


In [16]:
tr_targets.shape, vl_targets.shape

((1200, 1187, 4), (16, 1187, 4))

In [17]:
X3_tr, xtr_dates = reshape_targets(tr_index, X_tr1)
X3_vl, xvl_dates = reshape_targets(vl_index, X_vl1)

In [18]:
X3_tr.shape, X3_vl.shape

((1200, 1187, 422), (16, 1187, 422))

In [59]:
class JointModel(nn.Module):
    def __init__(self, in_f, out_f, d, p):
        super().__init__()
        self.fc1 = nn.Linear(in_f, d)
        self.h1 = nn.Linear(d, d)
        #self.attn = nn.MultiheadAttention(out_f, 1)
        self.classifier = nn.Linear(d, 4)
        self.classifier2 = nn.Linear(8, 4)
        #self.fc2 = nn.Linear(1187, 1187)
        self.bnorm1 = nn.BatchNorm1d(in_f, momentum=0.01)
        #self.norm1 = nn.LayerNorm(d)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        #self.bnorm2 = nn.InstanceNorm1d(d)
        self.drop = nn.Dropout(p)
        self.drop2d = nn.Dropout2d(0.1)
    
    def forward(self, x):
        x = self.bnorm1(x.permute(0, 2, 1)).permute(0, 2, 1).contiguous()
        #x = self.bnorm1(x)
        x = self.fc1(self.drop(x))
        x = self.relu(x)
        #x = x.permute(1, 0, 2).contiguous()
        #x = x + self.attn(x, x, x)[0]
        #x = self.relu(x)
        #x = x.permute(1, 0, 2).contiguous()
        x = x + x * self.tanh(self.h1(x))
        #x = x + x * self.tanh(self.h1(x))

        #x = self.bnorm2(x.permute(0, 2, 1)).permute(0, 2, 1).contiguous()
        x = self.relu(x)
        x = self.classifier(self.drop(x))
        x = torch.cat([x, x.max(dim=1, keepdims=True)[0].repeat(1, 1187, 1)], dim=2)
        x = self.classifier2(x)
        #x = x * self.tanh(self.fc2(x.permute(0, 2, 1)).permute(0, 2, 1).contiguous())
        #x = x/(1e-6 + x.max(dim=1, keepdims=True)[0])
        #x = x.permute(1, 0, 2).contiguous()
        #x = x + self.attn(x, x, x)[0]
        #x = x.permute(1, 0, 2).contiguous()
        return x


class LitModel(pl.LightningModule):
    """PL Model"""
    def __init__(
        self,
        in_f=76,
        out_f=4,
        d=32,
        p=0.05,
        lr=0.003,
        wd=0.001,
        grad_avg=True,
        steps=[30],
        gamma=0.1,
        
    ):
        super().__init__()
        self.save_hyperparameters()
        self.model = JointModel(in_f, out_f, d, p)
        self.criterion = torch.nn.L1Loss()

    def forward(self, x):
        return self.model(x)

    def step(self, batch):
        x, y = batch
        yhat = self.forward(x)
        loss = self.criterion(yhat, y)
        return loss, torch.square(10*yhat), torch.square(10*y)

    def training_step(self, batch, batch_idx):
        loss, preds, y = self.step(batch)
        loss_corr = self.criterion(y, preds)
        self.log("train/loss", loss_corr, on_step=True, on_epoch=True, prog_bar=True)
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        loss, preds, y = self.step(batch)
        loss_corr = self.criterion(y, preds)

        self.log("val/loss", loss_corr, on_step=False, on_epoch=True, prog_bar=True)
        return {"loss": loss}

    def test_step(self, batch, batch_idx):
        loss, preds, y = self.step(batch)
        return {"loss": loss, "preds": preds}

    def test_epoch_end(self, outputs):
        preds = torch.cat([o["preds"] for o in outputs], 0).cpu().numpy()
        self.log("output", preds)

    def configure_optimizers(self):
        optimizer = NovoGrad(self.parameters(), grad_averaging=self.hparams.grad_avg, lr=self.hparams.lr, weight_decay=self.hparams.wd)
        #optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.wd)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, gamma=self.hparams.gamma, milestones=self.hparams.steps)
        return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]

In [65]:
BATCH_SIZE = 8
NUM_WORKERS = 8
IN_F = 422
D = 2048
P = 0.05
LR = 0.005
WD = 0.001
GRAD_AVG = True
MAX_EPOCHS = 50
STEPS = [30, 40]
GAMMA = 0.1
version = 'v2'

In [66]:
tr_ds = PivotData(X3_tr, tr_targets, scaler)
tr_dl = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
vl_ds = PivotData(X3_vl, vl_targets, scaler)
vl_dl = DataLoader(vl_ds, batch_size=BATCH_SIZE*2, drop_last=False, shuffle=False, num_workers=NUM_WORKERS)

In [68]:
model = LitModel(IN_F, 4, D, P, lr=LR, wd=WD, grad_avg=GRAD_AVG, steps=STEPS, gamma=GAMMA)
#model.load_state_dict(torch.load("data/checkpoints/192_4578690_v2/epoch=35-val_loss=1.3396.ckpt")["state_dict"], strict=False)
callbacks = [pl.callbacks.ModelCheckpoint(monitor='val/loss', save_top_k=3, mode='min', dirpath=f'data/pivot_nn/checkpoints/{D}_{seed}_{version}', auto_insert_metric_name=False,
                                         filename="epoch={epoch}-val_loss={val/loss:.4f}")]
logger = pl.loggers.wandb.WandbLogger(project='mlb', name=f'nnflat_vl15_{str(D)}_{seed}', tags=['flat nn', f'{D}'], id=wandb.util.generate_id())
logger.log_hyperparams({'D': D, 'P': P, 'LR': LR, 'WD': WD, 'GRAD_AVG': GRAD_AVG, 'BATCH_SIZE': BATCH_SIZE})
trainer = pl.Trainer(gpus=[1], max_epochs=MAX_EPOCHS, logger=logger, callbacks=callbacks, stochastic_weight_avg=False, deterministic=True)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [69]:
trainer.fit(model, tr_dl, vl_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name      | Type       | Params
-----------------------------------------
0 | model     | JointModel | 5.1 M 
1 | criterion | L1Loss     | 0     
-----------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.287    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 123786000


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [70]:
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train/loss_step,1.26231
epoch,49.0
trainer/global_step,7499.0
_runtime,366.0
_timestamp,1627460252.0
_step,249.0
train/loss_epoch,1.10272
val/loss,1.38187


0,1
train/loss_step,█▅▁▂▂▂▂▃▁▂▁▂▁▂▁▁▂▂▁▂▂▂▂▂▁▂▂▁▂▂▂▁▃▂▃▂▁▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/loss,█▃▃▂▂▂▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


In [None]:
# lastn 