# The following is my take on March Mania 2022.
I did take some inspiration on data analysis and feature engineering(sabermetric part) from [here.](https://www.kaggle.com/toshimelonhead/ncaa-march-madness-sabermetric-spin)

In [None]:
#import necessary items

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import re
# Import PyTorch things
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.nn.functional as F

## Get all the necessary data

In [None]:
PATH = '/kaggle/input/mens-march-mania-2022/MDataFiles_Stage2'
os.getcwd()

In [None]:
ranking_df = pd.read_csv(os.path.join(PATH,'MMasseyOrdinals_thruDay128.csv'))
# keep = ['DOL', 'POM', 'MOR']
# ranking_df = ranking_df[ranking_df['SystemName'].isin(keep)].reset_index(drop=True).drop('SystemName',axis=1)
avg_rank = ranking_df.groupby(['Season','TeamID']).mean().reset_index().drop('RankingDayNum',axis=1)
avg_rank.head()

In [None]:
seed_df = pd.read_csv(os.path.join(PATH,'MNCAATourneySeeds.csv'))

seed_df = seed_df[seed_df['Season']>=2003].reset_index(drop=True)
seed_df

In [None]:
Reg_detail_csv = pd.read_csv(os.path.join(PATH,'MRegularSeasonDetailedResults.csv'))
Reg_detail_csv.head()

In [None]:
NCAA_detail_csv = pd.read_csv(os.path.join(PATH,'MNCAATourneyDetailedResults.csv'))
NCAA_detail_csv

## Now we start Feature Engineering

In [None]:
df = Reg_detail_csv.copy()

In [None]:
num_win = df.groupby(['Season', 'WTeamID']).count()
num_win = num_win.reset_index()[['Season', 'WTeamID', 'DayNum']].rename(\
                                            columns={"DayNum": "NumWins", "WTeamID": "TeamID"}).fillna(0)
num_loss = df.groupby(['Season', 'LTeamID']).count()
num_loss = num_loss.reset_index()[['Season', 'LTeamID', 'DayNum']].rename(\
                                            columns={"DayNum": "NumLosses", "LTeamID": "TeamID"}).fillna(0)

In [None]:
df_features_season_w = df.groupby(['Season', 'WTeamID']).count().reset_index()\
                        [['Season', 'WTeamID']].rename(columns={"WTeamID": "TeamID"})
df_features_season_l = df.groupby(['Season', 'LTeamID']).count().reset_index()\
                        [['Season', 'LTeamID']].rename(columns={"LTeamID": "TeamID"})

df['ScoreMargin'] = df['WScore'] - df['LScore']

win_score_margin = df.groupby(['Season', 'WTeamID']).mean().reset_index()
win_score_margin = win_score_margin[['Season', 'WTeamID', 'ScoreMargin']].rename(columns={"ScoreMargin": "AvgWinningScoreMargin", 
                                                                                          "WTeamID": "TeamID"
                                                                                         }).fillna(0)
lose_score_margin = df.groupby(['Season', 'LTeamID']).mean().reset_index()
lose_score_margin = lose_score_margin[['Season', 'LTeamID', 'ScoreMargin']].rename(columns={"ScoreMargin": "AvgLosingScoreMargin",
                                                                                            "LTeamID": "TeamID"
                                                                                           }).fillna(0)

In [None]:
df_features_season = pd.concat([df_features_season_w, df_features_season_l], axis=0).\
                                drop_duplicates().sort_values(['Season', 'TeamID']).reset_index(drop=True)

df_features_season = df_features_season.merge(num_win, on=['Season', 'TeamID'], how='left')
df_features_season = df_features_season.merge(num_loss, on=['Season', 'TeamID'], how='left')\

df_features_season = df_features_season.merge(win_score_margin, on=['Season', 'TeamID'], how='left')
df_features_season = df_features_season.merge(lose_score_margin, on=['Season', 'TeamID'], how='left')

df_features_season['NumWins'] = df_features_season['NumWins'].fillna(0)
df_features_season['NumLosses'] = df_features_season['NumLosses'].fillna(0)
df_features_season['AvgWinningScoreMargin'] = df_features_season['AvgWinningScoreMargin'].fillna(0)
df_features_season['AvgLosingScoreMargin'] = df_features_season['AvgLosingScoreMargin'].fillna(0)

df_features_season['WinPercentage'] = df_features_season['NumWins'] / (df_features_season['NumWins'] + df_features_season['NumLosses'])
df_features_season['AvgScoringMargin'] = (
    (df_features_season['NumWins'] * df_features_season['AvgWinningScoreMargin'] - 
    df_features_season['NumLosses'] * df_features_season['AvgLosingScoreMargin'])
    / (df_features_season['NumWins'] + df_features_season['NumLosses'])
)
df_features_season.drop(['AvgWinningScoreMargin', 'AvgLosingScoreMargin'], axis=1, inplace=True)

df_features_season

In [None]:
sabermetrics = pd.DataFrame()

sabermetrics['Season'] = df['Season']
sabermetrics['WTeamID'] = df['WTeamID']
sabermetrics['LTeamID'] = df['LTeamID']

# Number of Possessions
sabermetrics['WPossessions'] = (df['WFGA'] - df['WOR']) + df['WTO'] + .44 * df['WFTA']
sabermetrics['LPossessions'] = (df['LFGA'] - df['LOR']) + df['LTO'] + .44 * df['LFTA']

# df['WPossessions'] = sabermetrics['WPossessions']
# df['LPossessions'] = sabermetrics['LPossessions']

# Points Per Possession
sabermetrics['WPtsPerPoss'] = df['WScore'] / sabermetrics['WPossessions']
sabermetrics['LPtsPerPoss'] = df['LScore'] / sabermetrics['LPossessions']

# Effective Field Goal Percentage
sabermetrics['WEffectiveFGPct'] = ((df['WScore'] - df['WFTM']) / 2) / df['WFGA']
sabermetrics['LEffectiveFGPct'] = ((df['LScore'] - df['LFTM']) / 2) / df['LFGA']

# Percentage of Field Goals Assisted
sabermetrics['WAssistRate'] = df['WAst'] / df['WFGM']
sabermetrics['LAssistRate'] = df['LAst'] / df['LFGM']

# Offensive Rebound Percentage
sabermetrics['WOReboundPct'] = df['WOR'] / (df['WFGA'] - df['WFGM'])
sabermetrics['LOReboundPct'] = df['LOR'] / (df['LFGA'] - df['LFGM'])

# Defensive Rebound Percentage
sabermetrics['WDReboundPct'] = df['WDR'] / (df['LFGA'] - df['LFGM'])
sabermetrics['LDReboundPct'] = df['LDR'] / (df['WFGA'] - df['WFGM'])

# Assist to Turnover Ratio
sabermetrics['WATORatio'] = df['WAst'] / df['WTO']
sabermetrics['LATORatio'] = df['LAst'] / df['LTO']

# Turnover Rate
sabermetrics['WTORate'] = df['WTO'] / sabermetrics['WPossessions']
sabermetrics['LTORate'] = df['LTO'] /  sabermetrics['LPossessions']

# Percentage of Shots Beyond the Arc
sabermetrics['WBArcPct'] = df['WFGA3'] / df['WFGA']
sabermetrics['LBArcPct'] = df['LFGA3'] /  df['LFGA']

# Free Throw Rate
sabermetrics['WFTRate'] = df['WFTA'] / df['WFGA']
sabermetrics['LFTRate'] = df['LFTA'] /  df['LFGA']

In [None]:
df_tnv = NCAA_detail_csv[['Season','WTeamID','LTeamID']]


In [None]:
df_tnv = pd.merge(df_tnv,
             avg_rank,
             how='left',
             left_on=['Season', 'WTeamID'],
             right_on=['Season', 'TeamID']
             ).drop('TeamID', axis=1).rename(columns={'OrdinalRank': 'WRank'}
                                            )
df_tnv = pd.merge(df_tnv,
              avg_rank ,
              how='left', 
              left_on=['Season', 'LTeamID'], 
              right_on=['Season', 'TeamID']
             ).drop('TeamID', axis=1).rename(columns={'OrdinalRank': 'LRank'}
                                            )

df_tnv = pd.merge(df_tnv,
             seed_df,
             how ='left',
             left_on=['Season','WTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns={'Seed':'WSeed'}
                                           )

df_tnv = pd.merge(df_tnv,
             seed_df,
             how ='left',
             left_on=['Season','LTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns={'Seed':'LSeed'}
                                           )

In [None]:
def seeder(seed):
    return int(re.sub("[^0-9]", "", seed))

In [None]:
df_tnv.fillna('0',inplace=True)
df_tnv['WSeed'] = df_tnv['WSeed'].apply(seeder)
df_tnv['LSeed'] = df_tnv['LSeed'].apply(seeder)


## IMPORTANT
Wcols is the features of winning team and Lcols is the features of the losing team

In [None]:
Wcols = ['WFGM','WFGA','WFGM3','WFGA3','WFTM','WFTA','WOR','WDR','WAst','WTO','WStl',
        'WBlk','WPF',
    'WSeed','WRank','WNumWins','WNumLosses','WWinPercentage','WPossessions','WPtsPerPoss',
         'WEffectiveFGPct','WAssistRate','WOReboundPct','WDReboundPct','WATORatio',
         'WTORate','WBArcPct','WFTRate']
Lcols = ['LFGM','LFGA','LFGM3','LFGA3','LFTM','LFTA','LOR','LDR','LAst','LTO','LStl',
        'LBlk','LPF',
    'LSeed','LRank','WNumWins','WNumLosses','WWinPercentage','LPossessions','LPtsPerPoss',
         'LEffectiveFGPct','LAssistRate','LOReboundPct','LDReboundPct','LATORatio',
         'LTORate','LBArcPct','LFTRate']
len(Wcols)

In [None]:
def ColPopper(w,pop):
    """
    Helper function to pop the first element from either Wcols or Lcols
    pop is not needed in the computation, its there so while writing the code we know what we are popping
    """
    ren = {}
    p = pop
   
    for c in w:
        ren[c] = c[1:]
    print(p , ' popped successfully')
    return ren

In [None]:
W = ['WTeamID','Season']; w= Wcols[Wcols.index('WPossessions'):].copy(); W.extend(w)
L = ['LTeamID','Season']; l= Lcols[Lcols.index('LPossessions'):].copy(); L.extend(l)

sabermetrics_w = sabermetrics.groupby(['Season', 'WTeamID']).mean().reset_index()\
                        [W].rename(columns={"WTeamID": "TeamID"}).rename(columns = ColPopper(w,'W'))
sabermetrics_l = sabermetrics.groupby(['Season', 'LTeamID']).mean().reset_index()\
                        [L].rename(columns={"LTeamID": "TeamID"}).rename(columns = ColPopper(l,'L'))
sabermetrics_season = pd.concat([sabermetrics_w, sabermetrics_l], axis=0).\
                                drop_duplicates().sort_values(['Season', 'TeamID']).reset_index(drop=True)
sabermetrics_season = sabermetrics_season.groupby(['Season','TeamID']).mean().reset_index()
df_features_season = sabermetrics_season.merge(df_features_season,on=['Season','TeamID'])

In [None]:
df_features_season.columns

In [None]:
df_season = df.sort_values(by=['Season','DayNum'])
W = ['WTeamID','Season']; w= Wcols[:Wcols.index('WPF')+1]; W.extend(w)
L = ['LTeamID','Season']; l= Lcols[:Lcols.index('LPF')+1]; L.extend(l)

df_w = df_season.groupby(['Season', 'WTeamID']).mean().reset_index()\
                        [W].rename(columns={"WTeamID": "TeamID"}).rename(columns = ColPopper(w,'W'))
df_l = df_season.groupby(['Season', 'LTeamID']).mean().reset_index()\
                        [L].rename(columns={"LTeamID": "TeamID"}).rename(columns = ColPopper(l,'L'))
df_season = pd.concat([df_w, df_l], axis=0).\
                                drop_duplicates().sort_values(['Season', 'TeamID']).reset_index(drop=True)
df_season_stats = df_season.groupby(['Season','TeamID']).mean().reset_index()
df_season_stats.columns

In [None]:
Orig_col_names = df_features_season.columns[list(df_features_season.columns).index('Possessions'):]

def ColsAdder(cols,a):
    
    ren={}
    for c in cols:
        ren[c] = a+c
    return ren

o = df_season_stats.columns[list(df_season_stats.columns).index('FGM'):]
df_tnv = pd.merge(df_tnv,
             df_season_stats,
             how='left',
             left_on=['Season','WTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(o,'W')
                                           )
df_tnv = pd.merge(df_tnv,
             df_season_stats,
             how='left',
             left_on=['Season','LTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(o,'L')
                                           )

df_tnv = pd.merge(df_tnv,
             df_features_season,
             how='left',
             left_on=['Season','WTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(Orig_col_names,'W')
                                           )
df_tnv = pd.merge(df_tnv,
             df_features_season,
             how='left',
             left_on=['Season','LTeamID'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(Orig_col_names,'L')
                                           )

In [None]:
df_tnv[Wcols]

In [None]:
df_train_validate = df_tnv.copy()

In [None]:
def get_features_and_labels(df,Wcols,Lcols):
    '''
    Fuction to get features for the model. Takes input:
    df = The dataframe that consists game details.
    Wcols = Features of winning team.
    Lcols = Features of losing team.
    
    Outputs: a tuple of winning features and losing features, every second team's features are
    reversed so that the model doesn't overfits to displaying 1 all the time
    '''


    f_winners = df[Wcols].values.astype(np.float32)
    f_loosers = df[Lcols].values.astype(np.float32)

    y = np.ones(len(f_winners))

    
    for i in range(len(f_winners)):

        if i%2 == 0:
            looser_swap = f_winners[i].copy()
            f_winners[i], f_loosers[i] = f_loosers[i],looser_swap
            y[i] =0
    
    return torch.tensor((f_winners,f_loosers)),torch.tensor(y.astype(np.float32))

In [None]:
x,y  = get_features_and_labels(df_train_validate,Wcols,Lcols)
x = x.permute(1,0,2)
x.size()

In [None]:
df_test = pd.read_csv(os.path.join(PATH,'MSampleSubmissionStage2.csv'))
df_test['Season'] = df_test['ID'].apply(lambda x: int(x.split('_')[0]))
df_test['TeamA'] = df_test['ID'].apply(lambda x: int(x.split('_')[1]))
df_test['TeamB'] = df_test['ID'].apply(lambda x: int(x.split('_')[2]))
df_test

In [None]:
df_test = pd.merge(df_test,
             avg_rank,
             how='left',
             left_on=['Season', 'TeamA'],
             right_on=['Season', 'TeamID']
             ).drop('TeamID', axis=1).rename(columns={'OrdinalRank': 'ARank'}
                                            )
df_test = pd.merge(df_test,
              avg_rank ,
              how='left', 
              left_on=['Season', 'TeamB'], 
              right_on=['Season', 'TeamID']
             ).drop('TeamID', axis=1).rename(columns={'OrdinalRank': 'BRank'}
                                            )

df_test = pd.merge(df_test,
             seed_df,
             how ='left',
             left_on=['Season','TeamA'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns={'Seed':'ASeed'}
                                           )

df_test = pd.merge(df_test,
             seed_df,
             how ='left',
             left_on=['Season','TeamB'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns={'Seed':'BSeed'}
                                           )


In [None]:
df_test.fillna('0',inplace=True)
df_test['ASeed'] = df_test['ASeed'].apply(seeder)
df_test['BSeed'] = df_test['BSeed'].apply(seeder)

In [None]:
df_test = pd.merge(df_test,
             df_features_season,
             how='left',
             left_on=['Season','TeamB'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(Orig_col_names,'A')
                                           )
df_test = pd.merge(df_test,
             df_features_season,
             how='left',
             left_on=['Season','TeamB'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(Orig_col_names,'B')
                                           )

In [None]:
o = df_season_stats.columns[list(df_season_stats.columns).index('FGM'):]
df_test = pd.merge(df_test,
             df_season_stats,
             how='left',
             left_on=['Season','TeamB'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(o,'A')
                                           )
df_test = pd.merge(df_test,
             df_season_stats,
             how='left',
             left_on=['Season','TeamB'],
             right_on=['Season','TeamID']
             ).drop('TeamID',axis=1).rename(columns=ColsAdder(o,'B')
                                           )


In [None]:
Wcols = ['A' + w[1:] for w in Wcols]
Lcols = ['B' + l[1:] for l in Lcols]

In [None]:
x_test,_ = get_features_and_labels(df_test,Wcols,Lcols)
x_test = x_test.permute(1,0,2)

# Now the fun part begins
Defining the base for the actual ADNet. This includes helper methods to keep code clean during training and validation process.

In [None]:
class ADNetBase(nn.Module):
    
    def get_loss(self,batch,loss_fn):
        features,labels = batch
        preds = self(features)
        loss = loss_fn(preds,labels)
        return loss
    
    def validate(self,batch,loss_fn):
        feature, labels = batch
        loss = self.get_loss(batch,loss_fn)
        pred = self(feature)
        
        acc = accuracy(labels,pred)
        return {'valid_loss' : loss , 'valid_acc' : acc}
    
    def average_validation(self,out):
        loss = torch.stack([l['valid_loss'] for l in out]).mean()
        acc = torch.stack([l['valid_acc'] for l in out]).mean()
        return {'valid_loss': loss.item() , 'valid_acc': acc.item()}
    
    def log_epoch(self,e,epoch,res):
        
        print('[{} / {}] epoch/s, training loss is {:.4f} validation loss is {:.4f}, validation accuracy is {:.4f} '\
              .format(e+1,epoch,res['train_loss'],res['valid_loss'],
                                              res['valid_acc']))

# Implementation of ADNet(stands for my name itself).
Each team are passed through the convolutional layers separately then later concatenated and passed through a shallow neural network with only three hidden layers. Model would have performed well with more traning examples but in this case we got shy above 1000 training examples.

In [None]:
# Now before preping the data for the model, I'd create a somewhat basic model architecture.
class ADNet(ADNetBase):
    
    def __init__(self,in_channel,out_channel,hidden_size, kernel_size,pad,num_features):
        super().__init__()
        
        self.conv1 = nn.Conv1d(in_channel,out_channel,kernel_size = kernel_size,padding=pad)
        self.conv2 = nn.Conv1d(out_channel,out_channel*2,kernel_size= kernel_size,padding=pad)
        self.conv3 = nn.Conv1d(out_channel*2,out_channel*4,kernel_size= kernel_size,padding=pad)
        
# '''num_features *out_channels*4 *2''' 
        self.hidden1 = nn.Linear(6400,hidden_size *2)
        self.hidden2 = nn.Linear(hidden_size*2,hidden_size )
        self.hidden3 = nn.Linear(hidden_size,1)

        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self,x):
        
        first_team = x[:,0].unsqueeze(1)
        second_team = x[:,1].unsqueeze(1)
        
        # CNN calculation for first team
        f0 = self.conv1(first_team)
        f1 = self.relu(f0)
        f2 = self.conv2(f1)
        f3 = self.relu(f2)
        f4 = self.conv3(f3)
        f5 = self.flatten(f4)
    
        # CNN calculation for second team
        s0 = self.conv1(second_team)
        s1 = self.relu(s0)
        s2 = self.conv2(s1)
        s3 = self.relu(s2)
        s4 = self.conv3(s3)
        s5 = self.flatten(s4)
        
        # Concatenate the two results to input in hidden layer
        fc = torch.cat((f5,s5),1)
        
        # now move on to the two fully connected layers
        a0 = self.hidden1(fc)
        a1 = self.relu(a0)
        a2 = self.hidden2(a1)
        a3 = self.relu(a2)
        a4 = self.hidden3(a3)
        y = self.sigmoid(a4)
        
        return y.squeeze()

In [None]:
def pct_to_val(train_pct,data):
    '''Helper function to make code cleaer.
        changes percentage split into numbers of data.
        INPUTS:
        train_pct: the percentage of training data 
        valid_pct: the percentage of validation data
        data: the dataset
        returns: numbers of data'''
    train_num = int(train_pct/100*len(data))
    valid_num = int(len(data) - train_num)
    return train_num , valid_num

In [None]:
# Create a TensorDataset from our x and y
dataset = TensorDataset(x,y)
# Get the number of data in training ds and valid_ds
train_num,valid_num = pct_to_val(85,dataset)
# Get traning and validation datasets.
train_ds,valid_ds = random_split(dataset,[train_num,valid_num])
# Make a dataloader
BATCH = 64
N=2
train_dl = DataLoader(train_ds,batch_size=BATCH,shuffle=True,num_workers=N,pin_memory=True)
valid_dl = DataLoader(valid_ds,batch_size=BATCH,shuffle=True,num_workers=N,pin_memory=True)


In [None]:
device = torch.device('cuda') if torch.cuda.is_available else 'cpu'

def to_device(data,device):
    if isinstance(data,(list,tuple)):
        return [to_device(d,device) for d in data]
    return data.to(device,non_blocking = True)

In [None]:
class DeviceDL():
    
    def __init__(self,dl,dev):
        self.dl = dl
        self.dev = dev
    
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch,self.dev)
            
    def __len__(self):
        return len(self.dl)

In [None]:
train_dl = DeviceDL(train_dl,device)
valid_dl = DeviceDL(valid_dl,device)


In [None]:
def accuracy(label,preds):
    return torch.sum(torch.round(preds)==label) / len(label)

In [None]:
@torch.no_grad()
def valid(model,valid_dl,loss_fn):
    model.eval()
    out = [model.validate(batch,loss_fn) for batch in valid_dl]
    return model.average_validation(out)
    
def fit(model, train_dl, valid_dl, loss_fn, opt, EPOCHS):
    hist = []
    for e in range(EPOCHS):
        model.train()
        train_loss =[]
        for batch in train_dl:
            
            loss = model.get_loss(batch, loss_fn)
            train_loss.append(loss)
            loss.backward()
            opt.step()
            opt.zero_grad()
        
        res = valid(model,valid_dl,loss_fn)
        res['train_loss'] = torch.stack(train_loss).mean().item()
        
        model.log_epoch(e,EPOCHS,res)

        hist.append(res)
    return hist

In [None]:
in_channels = 1
out_channels = 32
num_features = x.size(2)
f = 17

hidden_size = num_features * out_channels * f
kernel_size = 2
pad = 0

In [None]:
model = ADNet(in_channels,out_channels,hidden_size,kernel_size,pad,num_features)
to_device(model,device)

# The model jerking process
This process is yet to be generalized with the correct hyperparameters and I will soon be posting about how this jerking process, with change in momentum and learning rate can help model attain the global minimum.(This is experimental process as of now made by me).

In [None]:
loss_fn = F.binary_cross_entropy
lr = 0.00001
opt = torch.optim.SGD(model.parameters(),lr=lr,momentum=.9)
EPOCHS = 20
history = fit(model, train_dl, valid_dl, loss_fn, opt, EPOCHS)

In [None]:
lr = 0.0001
opt = torch.optim.SGD(model.parameters(),lr=lr,momentum=1)
history = fit(model, train_dl, valid_dl, loss_fn, opt, EPOCHS)

In [None]:
lr = 0.001
opt = torch.optim.SGD(model.parameters(),lr=lr,momentum=.8)
history = fit(model, train_dl, valid_dl, loss_fn, opt, EPOCHS*2)

In [None]:
@torch.no_grad()
def test(dataloader):
    model.eval()
    out =[]
    for b in dataloader:
        preds = model(b)
        out.extend(preds.to('cpu').numpy())
    return torch.tensor(out)

In [None]:
test_dl = DataLoader(x_test,batch_size=1024,shuffle=True,num_workers=N,pin_memory=True)
test_dl = DeviceDL(test_dl,device)

In [None]:
preds = test(test_dl)

In [None]:
df_test['Pred'] = preds

In [None]:
sub = df_test[['ID','Pred']]

In [None]:
sub.to_csv('submit.csv',index=False)

In [None]:
preds.size()