In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
le = LabelEncoder()

In [5]:
train['Position'] = le.fit_transform(train['Position'])
test['Position'] = le.transform(test['Position'])

In [6]:
train['PreferredFoot'] = le.fit_transform(train['PreferredFoot'])
test['PreferredFoot'] = le.transform(test['PreferredFoot'])

In [7]:
train['AttackingWorkRate'] = le.fit_transform(train['AttackingWorkRate'])
test['AttackingWorkRate'] = le.transform(test['AttackingWorkRate'])

In [8]:
train['DefensiveWorkRate'] = le.fit_transform(train['DefensiveWorkRate'])
test['DefensiveWorkRate'] = le.transform(test['DefensiveWorkRate'])

In [9]:
from torch.utils.data import TensorDataset, DataLoader

In [10]:
import torch

In [11]:
X = torch.tensor(train.iloc[:, 1:-1].values, dtype = torch.float32)

In [12]:
y = torch.tensor(train['Prospect'], dtype = torch.float32)

In [13]:
target = torch.tensor(test.iloc[:, 1:].values, dtype = torch.float32)

In [14]:
from sklearn.model_selection import StratifiedKFold

In [15]:
from tqdm import tqdm_notebook

In [16]:
skf = StratifiedKFold(n_splits = 6, random_state = 42, shuffle = True)

In [17]:
import torch.nn as nn

In [18]:
import torch.optim as optim

In [19]:
class Prospect_Net(nn.Module) :
    
    def __init__(self) :
        super(Prospect_Net, self).__init__()
        self.layer_1 = nn.Linear(64, 256)
        self.relu_1 = nn.LeakyReLU()
        self.layer_2 = nn.Linear(256, 128)
        self.bn_1 = nn.BatchNorm1d(128)
        self.relu_2 = nn.LeakyReLU()
        self.layer_3 = nn.Linear(128, 32)
        self.do_1 = nn.Dropout(0.2)
        self.relu_3 = nn.LeakyReLU()
        self.layer_4 = nn.Linear(32, 1)
    def forward(self, x) :
        
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        x = self.relu_2(x)
        x = self.bn_1(x)
        x = self.layer_3(x)
        x = self.do_1(x)
        x = self.relu_3(x)
        x = self.layer_4(x)
        return x

In [20]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [21]:
criterion = nn.BCEWithLogitsLoss()

In [22]:
from sklearn.metrics import f1_score

In [23]:
nn_pred = np.zeros((target.shape[0], ))

In [24]:
for i, (tr_idx, val_idx) in enumerate(tqdm_notebook(skf.split(X, y))) :
    
    tr_x, tr_y = X[tr_idx], y[tr_idx]
    val_x, val_y = X[val_idx], y[val_idx]
    
    tr_loader = DataLoader(TensorDataset(tr_x, tr_y), shuffle = True, drop_last = True, batch_size = 32)
    val_loader = DataLoader(TensorDataset(val_x, val_y), shuffle = False, drop_last = False, batch_size = 32)
    te_loader = DataLoader(TensorDataset(target, torch.zeros((target.shape[0], ), dtype = torch.float32)), shuffle = False, drop_last = False, batch_size = 32)
    
    model = Prospect_Net().to(device)
    adam = optim.AdamW(model.parameters(), lr = 0.003)
    
    score_standard = 0
    patience = [0] * 5
    
    print(f"Fold {i + 1}...!")
    
    for epoch in range(20) :
        
        model.train()
        tr_loss = 0
        
        for idx, (xx, yy) in enumerate(tr_loader) :

            xx, yy = xx.to(device), yy.to(device)
            adam.zero_grad()
            pred = model(xx).squeeze()
            
            loss = criterion(pred, yy)
            loss.backward()
            tr_loss += loss.item() / skf.n_splits

            adam.step()
            
        with torch.no_grad() :
            
            model.eval()
            
            val_loss = 0
            
            predictions = []
            actuals = []
            
            for xx, yy in val_loader :
                
                xx, yy = xx.to(device), yy.to(device)
                pred = torch.sigmoid(model(xx).squeeze())
                loss = criterion(pred, yy)
                val_loss += loss.item()
                
                predictions += [1 if p >= 0.47 else 0 for p in pred]
                actuals += yy.tolist()
                
            score = f1_score(actuals, predictions, average = 'macro')
            val_loss /= len(val_loader)
            
            if score > score_standard :
                
                score_standard = score
                patience.append(0)
                torch.save(model.state_dict(), f'best_model_{len(patience) - 5}.pth')
                fold_pred = []
                
                for idx, (xx, _) in enumerate(te_loader) :
                    
                    xx = xx.to(device)
                    pred = torch.sigmoid(model(xx).squeeze()) / skf.n_splits
                    fold_pred += pred.tolist()
            else :
                patience.append(1)
                
            if patience[-5:] == [1] * 5 :
                
                print(f'Early Stopping...')
                break
                
        print(f"{epoch + 1} Epoch Train Loss : {round(tr_loss, 4)} Val Loss : {round(val_loss, 4)} Val F1 : {round(score, 4)} Best Score : {round(score_standard, 4)}")
    nn_pred += fold_pred
            
        
    print('\n')

0it [00:00, ?it/s]

Fold 1...!
1 Epoch Train Loss : 7.4223 Val Loss : 0.8097 Val F1 : 0.4255 Best Score : 0.4255
2 Epoch Train Loss : 6.4392 Val Loss : 0.9142 Val F1 : 0.2723 Best Score : 0.4255
3 Epoch Train Loss : 6.3896 Val Loss : 0.7462 Val F1 : 0.6978 Best Score : 0.6978
4 Epoch Train Loss : 6.0634 Val Loss : 0.69 Val F1 : 0.7053 Best Score : 0.7053
5 Epoch Train Loss : 6.1066 Val Loss : 0.7484 Val F1 : 0.6587 Best Score : 0.7053
6 Epoch Train Loss : 6.2239 Val Loss : 0.7455 Val F1 : 0.6527 Best Score : 0.7053
7 Epoch Train Loss : 6.0356 Val Loss : 0.7441 Val F1 : 0.6959 Best Score : 0.7053
8 Epoch Train Loss : 6.1333 Val Loss : 0.6813 Val F1 : 0.7248 Best Score : 0.7248
9 Epoch Train Loss : 6.0597 Val Loss : 0.7125 Val F1 : 0.725 Best Score : 0.725
10 Epoch Train Loss : 6.0242 Val Loss : 0.725 Val F1 : 0.7324 Best Score : 0.7324
11 Epoch Train Loss : 6.0559 Val Loss : 0.7526 Val F1 : 0.6567 Best Score : 0.7324
12 Epoch Train Loss : 5.9771 Val Loss : 0.6721 Val F1 : 0.6562 Best Score : 0.7324
13 Epoc

In [25]:
submission = pd.read_csv('sample_submission.csv')

In [26]:
submission['Prospect'] = [1 if p >= 0.5 else 0 for p in nn_pred]

In [27]:
submission.to_csv('nn.csv', index = False)