In [1]:
import rtdl
import torch
import torch.nn as nn
import torch.nn.functional as F
import zero

In [41]:
# Load the data
import pandas as pd

train_path = 'data/train.csv'
dev_in_path = 'data/dev_in.csv'

df_train = pd.read_csv(train_path)
df_train.head()


Unnamed: 0,fact_time,fact_latitude,fact_longitude,fact_temperature,fact_cwsm_class,climate,topography_bathymetry,sun_elevation,climate_temperature,climate_pressure,...,cmc_0_1_66_0_grad,cmc_0_1_66_0_next,cmc_0_1_67_0_grad,cmc_0_1_67_0_next,cmc_0_1_68_0_grad,cmc_0_1_68_0_next,gfs_2m_dewpoint_grad,gfs_2m_dewpoint_next,gfs_total_clouds_cover_low_grad,gfs_total_clouds_cover_low_next
0,1543321000.0,26.9688,-99.248901,2.0,0.0,dry,127.0,-17.526443,14.613571,754.263405,...,0.0,0.0,0.0,0.0,0.0,0.0,-2.600006,-2.750006,0.0,0.0
1,1538776000.0,29.374201,-100.927002,31.0,20.0,mild temperate,297.0,41.531032,26.992143,733.117168,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.600006,17.950006,-12.0,11.0
2,1552115000.0,22.149599,113.592003,17.0,10.0,mild temperate,-1.0,43.916531,18.842143,761.571076,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.233978,21.450006,1.0,8.0
3,1549566000.0,34.678699,-86.684799,24.0,20.0,mild temperate,193.0,40.240955,8.303571,747.52491,...,0.0,0.0,0.0,0.0,0.0,0.0,0.059448,16.150018,-58.0,41.0
4,1552910000.0,46.066667,41.966667,9.0,20.0,dry,90.0,30.39466,6.451429,753.168113,...,0.0,0.0,0.0,0.0,0.0,0.0,0.400024,3.150018,18.0,92.0


In [42]:
# Remove rows with NaN in training data
print(len(df_train))
df_train = df_train.dropna()
print(len(df_train))

3129592
2965544


In [43]:
df_dev_in = pd.read_csv(dev_in_path)
df_dev_in.head()

Unnamed: 0,fact_time,fact_latitude,fact_longitude,fact_temperature,fact_cwsm_class,climate,topography_bathymetry,sun_elevation,climate_temperature,climate_pressure,...,cmc_0_1_66_0_grad,cmc_0_1_66_0_next,cmc_0_1_67_0_grad,cmc_0_1_67_0_next,cmc_0_1_68_0_grad,cmc_0_1_68_0_next,gfs_2m_dewpoint_grad,gfs_2m_dewpoint_next,gfs_total_clouds_cover_low_grad,gfs_total_clouds_cover_low_next
0,1539162000,-40.35,-9.88,11.0,10,tropical,-843.0,31.78249,10.070714,765.631228,...,0.0,0.0,0.0,0.0,0.0,0.0,0.505035,2.647577,2.0,2.0
1,1545006600,53.421299,-6.27007,4.0,10,mild temperate,67.0,-59.691521,7.005,752.897615,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.400024,1.249994,0.0,0.0
2,1540094400,-19.7577,63.361,26.0,10,dry,6.0,35.250889,23.327143,763.115016,...,0.0,0.0,0.0,0.0,0.0,0.0,0.100006,21.050012,-1.0,1.0
3,1552611600,35.245899,47.009201,5.0,10,mild temperate,1390.0,-23.755615,3.109286,609.419333,...,1.69672,5.1653,0.0,4.9e-05,0.0,0.0,-1.5,-0.349982,-12.0,81.0
4,1545631200,26.633333,118.15,14.0,20,mild temperate,210.0,33.040438,12.172143,734.678037,...,0.0,0.0,0.0,0.0,0.0,0.0,0.102081,11.513879,-15.0,83.0


In [44]:
# Replace NaN with 0 for dev_in data
print(len(df_dev_in))
df_dev_in = df_dev_in.fillna(0)
print(len(df_dev_in))

50000
50000


In [53]:
# Set Seed
seed = 1
torch.manual_seed(seed)

<torch._C.Generator at 0x1850eddf300>

In [54]:
# Preprocess into tensors
import numpy as np
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

def get_lab_to_ind(data_df):
    '''
    Prepare a label to index map
    '''
    y_fact = set(list(data_df['fact_cwsm_class']))
    lab_to_ind = {}
    for i, lab in enumerate(y_fact):
        lab_to_ind[lab] = i
    return lab_to_ind

lab_to_ind = get_lab_to_ind(df_train)
batch_size = 256

# Train
X_train = torch.FloatTensor(np.asarray(df_train.iloc[:,6:]))
y_train = np.asarray(df_train['fact_cwsm_class'])
y_train = torch.LongTensor(np.asarray([lab_to_ind[lab] for lab in y_train]))

train_ds = TensorDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

# Dev in
X_dev_in = torch.FloatTensor(np.asarray(df_dev_in.iloc[:,6:]))
y_dev_in = df_dev_in['fact_cwsm_class']
y_dev_in = torch.LongTensor(np.asarray([lab_to_ind[lab] for lab in y_dev_in]))

dev_in_ds = TensorDataset(X_dev_in, y_dev_in)
dev_in_dl = DataLoader(dev_in_ds, batch_size=batch_size, shuffle=True)

In [55]:
# Get the device

def get_default_device():
    if torch.cuda.is_available():
        print("Got CUDA!")
        return torch.device('cuda')
    else:
        print("No CUDA found")
        return torch.device('cpu')

device = get_default_device()

No CUDA found


In [56]:
# Create the Feature Transformer Model

model = rtdl.FTTransformer.make_default(
    n_num_features=X_train.shape[1],
    cat_cardinalities=None,
    last_layer_query_idx=[-1],
    d_out=len(lab_to_ind)
)

model.to(device)

FTTransformer(
  (feature_tokenizer): FeatureTokenizer(
    (num_tokenizer): NumericalFeatureTokenizer()
  )
  (cls_token): CLSToken()
  (transformer): Transformer(
    (blocks): ModuleList(
      (0): ModuleDict(
        (attention): MultiheadAttention(
          (W_q): Linear(in_features=192, out_features=192, bias=True)
          (W_k): Linear(in_features=192, out_features=192, bias=True)
          (W_v): Linear(in_features=192, out_features=192, bias=True)
          (W_out): Linear(in_features=192, out_features=192, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
        )
        (ffn): FFN(
          (linear_first): Linear(in_features=192, out_features=512, bias=True)
          (activation): ReGLU()
          (dropout): Dropout(p=0.1, inplace=False)
          (linear_second): Linear(in_features=256, out_features=192, bias=True)
        )
        (attention_residual_dropout): Dropout(p=0.0, inplace=False)
        (ffn_residual_dropout): Dropout(p=0.0, inplace=False)


In [57]:
# Create optimizer

# These params don't need to be set as we will use a default optimizer
# lr = 0.01
# weight_decay = 0.9

optimizer = (
    model.make_default_optimizer()
    if isinstance(model, rtdl.FTTransformer)
    else torch.optim.AdamW(model.parameters(), lr=lr,
                          weight_decay=weight_decay)
)

In [58]:
# Create loss function criterion
criterion = nn.CrossEntropyLoss().to(device)

In [59]:
# Create pipeline to apply model
def apply_model(model, x_num, x_cat=None):
    '''
    FTTransformer expects numerical and categorical inputs separately
    '''
    return model(x_num, x_cat) if isinstance(model, rtdl.FTTransformer) else model(x_num)

In [60]:
# Training and Evaluation Function

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def accuracy_topk(output, target, k=1):
    """Computes the topk accuracy"""
    batch_size = target.size(0)

    _, pred = torch.topk(output, k=k, dim=1, largest=True, sorted=True)

    res_total = 0
    for curr_k in range(k):
      curr_ind = pred[:,curr_k]
      num_eq = torch.eq(curr_ind, target).sum()
      acc = num_eq/len(output)
      res_total += acc
    return res_total*100

def train(train_loader, model, criterion, optimizer, epoch, device, print_freq=5):
    '''
    Run one train epoch
    '''
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to train mode
    model.train()

    for i, (x, target) in enumerate(train_loader):

        x = x.to(device)
        target = target.to(device)

        # Forward pass
        logits = apply_model(model, x)
        loss = criterion(logits, target)

        # Backward pass and update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure accuracy and record loss
        acc = accuracy_topk(logits.data, target)
        accs.update(acc.item(), x.size(0))
        losses.update(loss.item(), x.size(0))

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Accuracy {prec.val:.3f} ({prec.avg:.3f})'.format(
                      epoch, i, len(train_loader),
                      loss=losses, prec=accs))

@torch.no_grad()
def eval(val_loader, model, criterion, device):
    '''
    Run evaluation
    '''
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to eval mode
    model.eval()


    for i, (x, target) in enumerate(val_loader):

        x = x.to(device)
        target = target.to(device)

        # Forward pass
        logits = apply_model(model, x)
        loss = criterion(logits, target)

        # measure accuracy and record loss
        acc = accuracy_topk(logits.data, target)
        accs.update(acc.item(), x.size(0))
        losses.update(loss.item(), x.size(0))

    print('Dev in\t Loss ({loss.avg:.4f})\t'
            'Accuracy ({prec.avg:.3f})\n'.format(
              loss=losses, prec=accs))

In [None]:
# Train
epochs = 10
for epoch in range(epochs):

    # train for one epoch
    print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
    train(train_dl, model, criterion, optimizer, epoch, device)

    # evaluate on validation set
    eval(dev_in_dl, model, criterion, device)

current lr 1.00000e-04
Epoch: [0][0/11585]	Loss 2.4380 (2.4380)	Accuracy 1.562 (1.562)
Epoch: [0][5/11585]	Loss 1.9627 (2.1928)	Accuracy 28.125 (17.969)
Epoch: [0][10/11585]	Loss 1.6063 (1.9975)	Accuracy 35.938 (25.107)
Epoch: [0][15/11585]	Loss 1.5470 (1.8667)	Accuracy 41.016 (28.467)
Epoch: [0][20/11585]	Loss 1.4440 (1.7773)	Accuracy 36.719 (30.450)
Epoch: [0][25/11585]	Loss 1.3629 (1.7122)	Accuracy 41.797 (31.956)
Epoch: [0][30/11585]	Loss 1.3858 (1.6633)	Accuracy 39.453 (32.976)
Epoch: [0][35/11585]	Loss 1.5077 (1.6272)	Accuracy 33.203 (33.474)
Epoch: [0][40/11585]	Loss 1.4634 (1.6019)	Accuracy 38.672 (33.851)
Epoch: [0][45/11585]	Loss 1.3368 (1.5797)	Accuracy 35.938 (34.035)
Epoch: [0][50/11585]	Loss 1.3595 (1.5578)	Accuracy 35.938 (34.176)
Epoch: [0][55/11585]	Loss 1.3092 (1.5421)	Accuracy 39.062 (34.570)
Epoch: [0][60/11585]	Loss 1.4010 (1.5287)	Accuracy 39.062 (34.849)
Epoch: [0][65/11585]	Loss 1.3414 (1.5192)	Accuracy 38.672 (34.943)
Epoch: [0][70/11585]	Loss 1.3333 (1.5076)	A