In [1]:
import math
import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from pathlib import Path
import pandas as pd

In [2]:
MODEL_PATH = Path.cwd()/'models'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
class CustomDataset(Dataset):
    def __init__(self, df, transform=None, target_transform=None):
        self.df = df
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        x = self.df.drop(labels='AGE', axis=1).iloc[idx]
        x = torch.from_numpy(x.values)
        #returning y as a scalar might be a problem
        y = self.df.iloc[idx].AGE
        if self.transform:
            x = self.transform(x)
        if self.target_transform:
            y = self.target_transform(y)
        return x, y

In [4]:
class NN(nn.Module):
    def __init__(self, layers=[1000,500,250], ps=0.35, in_features=20, y_range=(20, 90)):
        super(NN, self).__init__()
        self.y_range = y_range
        self.layers = layers
        self.ps = ps
        layers = [in_features] + layers
        layers = list(zip(layers, layers[1:]))
        
        l = []
        for layer in layers:
            l.append(nn.Linear(*layer))
            #TODO: play with negative slope koef. of LeakyReLU
            l.append(nn.LeakyReLU())
            l.append(nn.Dropout(ps))
        l.append(nn.Linear(layers[-1][1], 1))

        self.arch = nn.Sequential(*l)
        
    def forward(self, x):
        x = self.arch(x)
        x = (self.y_range[1]-self.y_range[0]) * torch.sigmoid(x) + self.y_range[0]
        return x
    
    def __repr__(self):
        return "Linear -> LeakyReLU -> Dropout\nlayers: {}\nps: {}\n".format(self.layers, self.ps)

In [5]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, in_5_range = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device).float(), y.to(device).float().unsqueeze(dim=1)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            in_5_range += (abs(pred - y) < 5).sum().item()
    test_loss /= num_batches
    in_5_range /= size
    print(f"Error: \n Predictions in 5 range: {(100*in_5_range):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [6]:
def train(dataloader, model, loss_fn, optimizer):
    def closure():
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        return loss

    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device).float(), y.to(device).float().unsqueeze(dim=1)
        if isinstance(optimizer, Eve):
            loss = optimizer.step(closure)
        else:
            loss = closure()
            optimizer.step()

In [7]:
def train_test(train_path='Data/train_data.csv', test_path='Data/test_data.csv',
               optimizer=None, loss_fn=nn.L1Loss(), epochs=5, lr=1e-3,
               layers=[1000,500,250], ps=0.35, train_loss=True):

    train_set = CustomDataset(pd.read_csv(train_path))
    test_set = CustomDataset(pd.read_csv(test_path))
    train_dataloader = DataLoader(train_set, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(test_set, batch_size=32, shuffle=True)

    model = NN(layers, ps=ps).to(device)
    print(model, end='')
    if not optimizer: 
        optimizer = Eve(model.parameters())
        print('Optimizer: default EVE')
    
    else: 
        optimizer = optimizer(model.parameters(), lr=lr)
        print('Optimizer: {}\nLearning rate: {}'.format(optimizer.__class__.__name__, lr))
    
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer)
        if train_loss:
            print('Train ', end='')
            test(train_dataloader, model, loss_fn)
        print('Test ', end='')
        test(test_dataloader, model, loss_fn)
    return model

In [8]:
class Eve:
    pass

In [9]:
opt = torch.optim.Adam

In [10]:
model = train_test('nivis_data/train_data.csv', 'nivis_data/test_data.csv', optimizer=opt, epochs=15)

Linear -> LeakyReLU -> Dropout
layers: [1000, 500, 250]
ps: 0.35
Optimizer: Adam
Learning rate: 0.001
Epoch 1
-------------------------------
Train Error: 
 Predictions in 5 range: 29.7%, Avg loss: 10.286382 

Test Error: 
 Predictions in 5 range: 28.9%, Avg loss: 10.391990 

Epoch 2
-------------------------------
Train Error: 
 Predictions in 5 range: 31.4%, Avg loss: 10.151191 

Test Error: 
 Predictions in 5 range: 29.5%, Avg loss: 10.376546 

Epoch 3
-------------------------------
Train Error: 
 Predictions in 5 range: 33.5%, Avg loss: 9.712109 

Test Error: 
 Predictions in 5 range: 31.7%, Avg loss: 9.914822 

Epoch 4
-------------------------------
Train Error: 
 Predictions in 5 range: 34.6%, Avg loss: 9.696832 

Test Error: 
 Predictions in 5 range: 33.1%, Avg loss: 9.995555 

Epoch 5
-------------------------------
Train Error: 
 Predictions in 5 range: 33.4%, Avg loss: 9.708571 

Test Error: 
 Predictions in 5 range: 30.9%, Avg loss: 10.064059 

Epoch 6
--------------------

In [11]:
torch.save(model.state_dict(), MODEL_PATH/'adam_1113_data_state_dict')