In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from torchsummary import summary
from utils.training import *

%matplotlib notebook

In [20]:
def train_SGD(net, train_data, num_epochs, optim_args, sched_args, grad_clip, verbose):
    """
    Train NN on provided data sets with SGD optimizer and
    ReduceLROnPlateau learning rate scheduler.

    Args:
        net: neural network
        train_data: dataset containing training inputs and targets
        num_epochs: number of epochs
        optim_args: dict of hyperparameters for SGD
            - lr: learning rate [default: 0.001]
            - momentum: momentum factor [default: 0.9]
            - weight_decay: L2 regularization [default: 0.0]
            - dampening: dampening for momentum [default: 0.0]
            - nesterov: enables Nesterov momentum [default: False]
        sched_args: dict of hyperparameters for ReduceLROnPlateau learning rate scheduler
            - factor: multiplicative factor of learning rate decay [default: 0.1] 
            - patience: epochs below threshold before reducing learning rate [default: 1000]
            - threshold: threshold for measuring the new optimum [default: 1e-4]
            - cooldown: epochs to wait to resume operation after lr has been reduced [default: 0]
        clip_args: dict of parameters for clipping gradient norms
            - clip_norm: option to clip gradient norm [default: False]
            - max_norm: max norm of the gradients [default: 5]
        verbose: dict of printing actions
            - verbose: print real-time training results [default: True]
            - print_every: interval to print real-time training results [default: 1]
    """

    X_train, y_train = train_data
    num_train = X_train.size(0)
    print('Training examples: %i' % num_train)
    print('')

    # Loss function and optimization method
    criterion = nn.MSELoss()
    optimizer = optim.SGD(
        net.parameters(),
        lr=optim_args.get('lr', 1e-3),
        momentum=optim_args.get('momentum', 0.9),
        weight_decay=optim_args.get('weight_decay', 0.0),
        dampening=optim_args.get('dampening', 0.0),
        nesterov=optim_args.get('nesterov', False),
    )

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min',
        factor=sched_args.get('factor', 0.1),
        patience=sched_args.get('patience', 1000),
        verbose=True,
        threshold=sched_args.get('threshold', 1e-4),
        cooldown=sched_args.get('cooldown', 0),
    )

    train_loss = np.zeros(num_epochs)
    print('Training %s ...' % net.__class__.__name__)

    net.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = net(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        if grad_clip.get('clip_norm', False):
            nn.utils.clip_grad_norm_(net.parameters(), grad_clip.get('max_norm', 5))
        optimizer.step()
    
        if verbose.get('verbose', True):
            if (epoch + 1) % verbose.get('print_every', 1) == 0:
                print('Epoch: %6d/%d,    Loss: %.6f' %
                      (epoch + 1, num_epochs, loss / num_train))
        
        train_loss[epoch] = loss / num_train
        scheduler.step(loss)
        
    fig = plot_train_loss(train_loss.flatten(), net.__class__.__name__, criterion)
    fig.show()

    print('')

    return net

In [37]:
def train_Adam(net, train_data, num_epochs, optim_args, sched_args, grad_clip, verbose):
    """
    Train NN on provided data sets with Adam optimizer and 
    ReduceLROnPlateau learning rate scheduler.

    Args:
        net: neural network
        train_data: dataset containing training inputs and targets
        num_epochs: number of epochs
        optim_args: dict of hyperparameters for Adam optimizer
            - lr: learning rate [default: 0.001]
            - betas: coefficients for running averages [default: (0.9, 0.999)]
            - eps: epsilon for numerical stability [default: 1e-8]
            - weight_decay: L2 regularization [default: 0.0]
        sched_args: dict of hyperparameters for ReduceLROnPlateau learning rate scheduler
            - patience: epochs below threshold before reducing learning rate [default: 1000]
            - threshold: threshold for measuring the new optimum [default: 1e-4]
            - cooldown: epochs to wait to resume operation after lr has been reduced [default: 0]
        grad_clip: dict of parameters for clipping gradient norms
            - clip_norm: option to clip gradient norm [default: False]
            - max_norm: max norm of the gradients [default: 5]
        verbose: dict of printing actions
            - verbose: print real-time training results [default: True]
            - print_every: interval to print real-time training results [default: 1]
    """

    X_train, y_train = train_data
    num_train = X_train.size(0)
    print('Training examples: %i' % num_train)
    print('')

    # Loss function and optimization method
    criterion = nn.MSELoss()
    optimizer = optim.Adam(
        net.parameters(),
        lr=optim_args.get('lr', 1e-3),
        betas=optim_args.get('betas', (0.9, 0.999)),
        eps=optim_args.get('eps', 1e-8),
        weight_decay=optim_args.get('weight_decay', 0.0),
    )

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min',
        factor=sched_args.get('factor', 0.1),
        patience=sched_args.get('patience', 1000),
        verbose=True,
        threshold=sched_args.get('threshold', 1e-4),
        cooldown=sched_args.get('cooldown', 0),
    )

    train_loss = np.zeros(num_epochs)
    print('Training %s ...' % net.__class__.__name__)

    net.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = net(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        if grad_clipget('clip_norm', False):
            nn.utils.clip_grad_norm_(net.parameters(), grad_clip.get('max_norm', 5))
        optimizer.step()
    
        if verbose.get('verbose', True):
            if (epoch + 1) % verbose.get('print_every', 1) == 0:
                print('Epoch: %6d/%d,    Loss: %.6f' %
                      (epoch + 1, num_epochs, loss / num_train))
        
        train_loss[epoch] = loss / num_train
        scheduler.step(loss)
        
    fig = plot_train_loss(train_loss.flatten(), net.__class__.__name__, criterion)
    fig.show()

    print('')

    return net

In [10]:
# Function to train net
def train_valid(net, train_data, valid_data, parameters, verbose):
    """
    Train NN on provided data sets.

    Args:
        net: neural network
        train_data: dataset containing training inputs and targets
        valid_data: dataset containing validation inputs and targets
        parameters: dict of hyperparameters for training with Adam
            - num_epochs: number of epochs [default (1)]
            - lr: learning rate [default (0.001)]
            - betas: coefficients for running averages [default (0.9, 0.999)]
            - eps: epsilon for numerical stability [default (1e-8)]
            - weight_decay: L2 regularization [default (0.0)]
            - step_size: period of learning rate decay [default (50)]
            - gamma: multiplicative factor of learning rate decay [default (0.1)]
            - clip_norm: option to clip gradient norm
            - max_norm: max norm of the gradients [default (5)]
        verbose: dict of printing actions
            - verbose: print real-time training results [default (True)]
            - print_every: interval to print real-time training results [default (1)]
    """

    X_train, y_train = train_data
    X_valid, y_valid = valid_data
    num_train = X_train.size(0)
    num_valid = X_valid.size(0)
    print('Training examples: ', num_train)
    print('Validation examples: ', num_valid)
    print('')

    # Loss function and optimization method
    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(),
                           lr=parameters.get('lr', 1e-3),
                           betas=parameters.get('betas', (0.9, 0.999)),
                           eps=parameters.get('eps', 1e-8),
                           weight_decay=parameters.get('weight_decay', 0.0),
                          )
    scheduler = optim.lr_scheduler.StepLR(optimizer, 
                                          step_size=parameters.get('step_size', 50),
                                          gamma=parameters.get('gamma', 0.1),
                                         )

    num_epochs = parameters.get('num_epochs', 1)
    train_loss = np.zeros(num_epochs)
    valid_loss = np.zeros(num_epochs)
    print('Training %s ...' % net.__class__.__name__)

    for epoch in range(num_epochs):
        net.train()
        optimizer.zero_grad()
        outputs = net(X_train)
        t_loss = criterion(outputs, y_train)
        t_loss.backward()
        if parameters.get('clip_norm', False):
            nn.utils.clip_grad_norm_(net.parameters(), parameters.get('max_norm', 5))
        optimizer.step()
        
        net.eval()
        with torch.no_grad():
            outputs = net(X_valid)
            v_loss = criterion(outputs, y_valid)
    
        if verbose.get('verbose', True):
            if (epoch + 1) % verbose.get('print_every', 1) == 0:
                print('Epoch: %d/%d,    lr: %e,    Train loss: %.6f    Valid loss: %.6f' %
                      (epoch + 1, 
                       num_epochs, 
                       scheduler.get_last_lr()[0], 
                       t_loss / num_train, 
                       v_loss / num_valid)
                     )
        
        train_loss[epoch] = t_loss / num_train
        valid_loss[epoch] = v_loss / num_valid
        scheduler.step()
        
    fig = plot_losses(train_loss.flatten(), valid_loss.flatten(), net.__class__.__name__, criterion)
    fig.show()

    print('')

    return net

In [11]:
# Function to train net with mini-batches
def batch_train(net, train_data, batch_size, parameters, verbose):
    """
    Train NN on provided data sets.

    Args:
        net: neural network
        train_data: PyTorch Dataset containing training inputs and targets
        batch_size: mini-batch size
        parameters: dict of hyperparameters for training with Adam
            - num_epochs: number of epochs [default (1)]
            - lr: learning rate [default (0.001)]
            - betas: default (0.9, 0.999)
            - eps: epsilon [default (1e-8)]
            - weight_decay: L2 regularization [default (0.0)]
            - step_size: period of learning rate decay [default (50)]
            - gamma: multiplicative factor of learning rate decay [default (0.1)]
            - max_norm: max norm of the gradients [default (5)]
        verbose: dict of printing actions
            - verbose: print real-time training results [default (True)]
            - print_every: interval to print real-time training results [default (1)]
    """

    trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
    num_train = len(trainloader)
    print('Training examples: ', num_train)
    print('')

    # Loss function and optimization method
    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(),
                           lr=parameters.get('lr', 1e-3),
                           betas=parameters.get('betas', (0.9, 0.999)),
                           eps=parameters.get('eps', 1e-8),
                           weight_decay=parameters.get('weight_decay', 0.0),
                          )
    scheduler = optim.lr_scheduler.StepLR(optimizer, 
                                          step_size=parameters.get('step_size', 50),
                                          gamma=parameters.get('gamma', 0.1),
                                         )

    num_epochs = parameters.get('num_epochs', 1)
    train_loss = np.zeros(num_epochs)
    print('Training %s ...' % net.__class__.__name__)

    net.train()
    for epoch in range(num_epochs):
        
        running_loss = 0.0
        for i, (X_batch, y_batch) in enumerate(trainloader):
            
            optimizer.zero_grad()
            outputs = net(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            nn.utils.clip_grad_norm_(net.parameters(), parameters.get('max_norm', 5))
            optimizer.step()
            running_loss += loss.item()
            train_loss[epoch] += loss.item()

        if verbose.get('verbose', True):
            if (epoch + 1) % verbose.get('print_every', 1) == 0:
                print('Epoch: %d/%d,    lr: %e,    Loss: %.6f' %
                      (epoch + 1, num_epochs, scheduler.get_last_lr()[0], 
                       running_loss / num_train))
        
        train_loss[epoch] /= num_train
        scheduler.step()
        
    fig = plot_train_loss(train_loss.flatten(), net.__class__.__name__, criterion)
    fig.show()

    print('')

    return net

In [12]:
# Plot training curve
def plot_train_loss(train_loss, title, criterion):
    num_epochs = len(train_loss)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4), dpi=150)
    ax.plot(np.arange(1, num_epochs + 1), train_loss, label='Train')
    ax.set_title(title)
    ax.set_xlabel('Epochs')
    ax.set_ylabel(criterion.__class__.__name__)
    ax.set_yscale('log')
    ax.legend()
    ax.grid(b=True, which='both', axis='both', color='grey', alpha=0.2)
    fig.tight_layout()
    
    return fig

In [13]:
# Plot training and validation curves
def plot_losses(train_loss, val_loss, title, criterion):
    num_epochs = len(train_loss)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4), dpi=150)
    ax.plot(np.arange(1, num_epochs + 1), train_loss, label='Train')
    ax.plot(np.arange(1, num_epochs + 1), val_loss, label='Validation')
    ax.set_title(title)
    ax.set_xlabel('Epochs')
    ax.set_ylabel(criterion.__class__.__name__)
    ax.set_yscale('log')
    ax.legend()
    ax.grid(b=True, which='both', axis='both', color='grey', alpha=0.2)
    fig.tight_layout()
    
    return fig

# Each zeolite = line

In [None]:
each_zeolite = pd.read_csv('../data/chemistry-channel-info/each-zeolite-info.csv', 
                                dtype={'zeolite': str})
each_zeolite = each_zeolite[each_zeolite.num_channels != 0]  # remove zeolites with no channels
each_zeolite

In [None]:
X_train, X_test, y_train, y_test = train_test_split(each_zeolite[each_zeolite.columns[1:6]].values,
                                                    each_zeolite.kH_C18.values, test_size=0.3,
                                                    random_state=12)

In [None]:
class FourLayerNet(nn.Module):
    def __init__(self):
        super(FourLayerNet, self).__init__()
        self.regressor = nn.Sequential(
            nn.Linear(5, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 1),
        )

    def forward(self, x):
        x = self.regressor(x).view(-1)
        return x

In [None]:
summary(FourLayerNet(), (1, 5))

In [None]:
# 10 examples -- batch training
fourlayernet = FourLayerNet()
fourlayernet = train(fourlayernet, 
                     train_data=(
                         torch.Tensor(X_train).float()[:10], 
                         torch.Tensor(y_train).float()[:10],
                     ), 
                     parameters={
                         'num_epochs': 20000,
                         'lr': 1e-3,
                         'weight_decay': 0.0,
                         'step_size': 4000,
                         'gamma': 0.1,
                         'clip_norm': True,
                         'max_norm': 1,
                     },
                     verbose={
                         'print_every': 2000,
                     },
                    )

In [None]:
# 50 examples -- batch training
fourlayernet = FourLayerNet()
fourlayernet = train(fourlayernet, 
                     train_data=(
                         torch.Tensor(X_train).float()[:50], 
                         torch.Tensor(y_train).float()[:50],
                     ), 
                     parameters={
                         'num_epochs': 100000,
                         'lr': 1e-3,
                         'weight_decay': 0.0,
                         'step_size': 40000,
                         'gamma': 0.1,
                         'clip_norm': True,
                         'max_norm': 1,
                     },
                     verbose={
                         'print_every': 10000,
                     },
                    )

In [None]:
# 50 examples -- batch training [changed lr scheduler -- now ReduceLROnPlateau, previously StepLR]
fourlayernet = FourLayerNet()
fourlayernet = train_RLRP(fourlayernet, 
                          train_data=(
                              torch.Tensor(X_train).float()[:50], 
                              torch.Tensor(y_train).float()[:50],
                          ), 
                          parameters={
                              'num_epochs': 100000,
                              'lr': 1e-3,
                              'weight_decay': 0.0,
                              'factor': 0.25,
                              'patience': 5000,
                              'threshold': 1e-2,
                              'clip_norm': True,
                              'max_norm': 1,
                          },
                          verbose={
                              'print_every': 10000,
                          },
                         )

In [None]:
# 100 examples -- batch training [changed lr scheduler -- now ReduceLROnPlateau, previously StepLR]
fourlayernet = FourLayerNet()
fourlayernet = train_RLRP(fourlayernet, 
                          train_data=(
                              torch.Tensor(X_train).float()[:100], 
                              torch.Tensor(y_train).float()[:100],
                          ), 
                          parameters={
                              'num_epochs': 100000,
                              'lr': 1e-3,
                              'weight_decay': 0.0,
                              'factor': 0.1,
                              'patience': 5000,
                              'threshold': 1e-4,
                              'clip_norm': True,
                              'max_norm': 1,
                          },
                          verbose={
                              'print_every': 10000,
                          },
                         )

In [None]:
# Full set -- batch training
fourlayernet = FourLayerNet()
fourlayernet = train(fourlayernet, 
                     train_data=(
                         torch.Tensor(X_train).float(), 
                         torch.Tensor(y_train).float(),
                     ), 
                     parameters={
                         'num_epochs': 10000,
                         'lr': 1e-2,
                         'weight_decay': 0.0,
                         'step_size': 4000,
                         'gamma': 0.1,
                         'clip_norm': False,
                         'max_norm': 1,
                     },
                     verbose={
                         'print_every': 1000,
                     },
                    )

In [None]:
# Full set - mini-batch training
fourlayernet = FourLayerNet()
fourlayernet = batch_train(fourlayernet, 
                           train_data=(
                               TensorDataset(
                                   torch.Tensor(X_train).float(), 
                                   torch.Tensor(y_train).float(),
                               )
                           ),
                           batch_size=300,
                           parameters={
                               'num_epochs': 100,
                               'lr': 1e-4,
                               'weight_decay': 0.0,
                               'step_size': 200,
                               'gamma': 0.5,
                               'max_norm': 5,
                           },
                           verbose={
                               'print_every': 10,
                           },
                          )

# Add three channel (min, max) descriptors

In [25]:
df_full = pd.read_csv('../data/chemistry-channel-info/each-zeolite-info-FULL.csv', 
                     low_memory=False)
df_full = df_full[df_full.num_channels != 0]  # remove zeolites with no channels

# convert nonzero kH_C18 to log10 scale
# df_full.kH_C18 = [np.log10(x) if x > 0 else 0 for x in df_full.kH_C18]
df_full

Unnamed: 0,zeolite,set,kH_C18,kH_C24,kH_C30,kH_2C17,kH_4C17,kH_22C16,U_C18,U_C24,...,LCD_max,PLD_min,PLD_max,LCD_free_min,LCD_free_max,num_channels,dimensionality,largest_inc_sphere,largest_free_sphere,largest_inc_sphere_free_path
0,ABW-0,IZA,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,505.3769670548,729.9573496964,...,4.08290,3.34747,3.34747,4.08290,4.08290,2,1.0,4.08290,3.34747,4.08290
1,ABW-1,IZA,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,798.8761754772,,...,3.81867,3.34501,3.34501,3.81867,3.81867,2,1.0,3.81867,3.34501,3.81867
2,ACO-0,IZA,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,291.7543179456,413.6743325738,...,4.41809,3.40354,3.40354,4.41809,4.41809,1,3.0,4.41809,3.40354,4.41809
3,AEI-0,IZA,1.738134e-09,2.062889e-13,1.473491e-14,1.179514e-09,5.203304e-08,7.884349e-10,-39.3565206512,-21.4450331362,...,7.17261,3.68131,3.68131,7.17261,7.17261,1,3.0,7.17261,3.68131,7.17261
4,AEI-1,IZA,1.457747e-08,1.820195e-14,0.000000e+00,1.186425e-08,1.950043e-07,4.871419e-09,-42.8064146355,-6.8348403793,...,7.31458,3.66592,3.66592,7.31458,7.31458,1,3.0,7.31458,3.66592,7.31458
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331560,8331159,PCOD,1.397022e+02,1.496674e+03,1.097612e+04,1.334110e+02,1.639298e+02,9.366793e+01,-83.2492718918,-107.6866983127,...,13.92170,4.31937,4.31937,13.92170,13.92170,1,3.0,13.92170,4.31937,13.92170
331561,8331160,PCOD,2.146890e-01,4.001510e-07,1.126798e-12,5.901711e-01,7.716776e-02,1.002744e-02,-94.3413357654,-83.3363502511,...,10.45520,3.81981,3.81981,10.45520,10.45520,2,3.0,10.45520,3.81981,10.45520
331563,8331162,PCOD,4.083667e-05,1.807549e-10,1.866423e-13,3.902054e-05,2.245108e-06,1.200871e-05,-78.6994308605,-81.6098440478,...,9.99381,3.64882,3.64882,9.99381,9.99381,2,3.0,9.99381,3.64882,9.99381
331564,8331163,PCOD,7.924689e+01,1.013280e+03,1.393362e+04,8.997104e+01,1.121012e+02,8.383438e+01,-71.8405730765,-94.8762147778,...,16.65820,3.75074,3.75074,16.65820,16.65820,2,3.0,16.65820,3.75074,16.65820


In [26]:
train_cols = ['dim_C18', 'geometrical_dimension', 'LCD_min', 'LCD_max', 'PLD_min',
              'PLD_max', 'LCD_free_min', 'LCD_free_max', 'num_channels']

In [27]:
X_train, X_test, y_train, y_test = train_test_split(df_full[train_cols].values,
                                                    df_full.kH_C18.values, 
                                                    test_size=0.25, random_state=12)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, 
                                                      y_train, 
                                                      test_size=(1/3), random_state=13)

In [17]:
class FourLayerNet_V2(nn.Module):
    def __init__(self):
        super(FourLayerNet_V2, self).__init__()
        self.regressor = nn.Sequential(
            nn.Linear(9, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 1),
        )

    def forward(self, x):
        x = self.regressor(x).view(-1)
        x = 10 ** x  # convert from log10 scale to linear scale
        return x

In [28]:
# Split into smaller set for quicker training
df_full = df_full.sample(frac=0.5, replace=False, random_state=11)
df_full

Unnamed: 0,zeolite,set,kH_C18,kH_C24,kH_C30,kH_2C17,kH_4C17,kH_22C16,U_C18,U_C24,...,LCD_max,PLD_min,PLD_max,LCD_free_min,LCD_free_max,num_channels,dimensionality,largest_inc_sphere,largest_free_sphere,largest_inc_sphere_free_path
117813,8117412,PCOD,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,10000.0,10000.0,...,3.62186,2.91908,2.91908,3.62186,3.62186,1,1.0,3.62186,2.91908,3.62186
146264,8145863,PCOD,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,10000.0,10000.0,...,4.05824,3.17789,3.17789,4.05824,4.05824,2,1.0,4.05824,3.17789,4.05824
308236,8307835,PCOD,1.109653e+04,3.233361e+05,4.481302e+06,11320.636735,9680.818644,8677.379800,-133.052270466,-159.5909757233,...,8.20807,4.91420,4.91420,8.20807,8.20807,1,3.0,8.20807,4.91420,8.20807
236637,8236236,PCOD,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,16.084272057,50.9245779391,...,6.00741,3.21518,3.21518,6.00741,6.00741,2,1.0,6.00741,3.21518,6.00741
177763,8177362,PCOD,3.052614e+02,5.706065e+03,1.003812e+05,126.377745,93.934959,135.418367,-118.3033056783,-170.5191675561,...,8.22201,4.45378,4.45378,8.22201,8.22201,1,3.0,8.22201,4.45378,8.22201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239586,8239185,PCOD,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,813.9664336353999,10000.0,...,4.55041,2.99962,2.99962,4.55041,4.55041,2,1.0,4.55041,2.99962,4.55041
265648,8265247,PCOD,1.284820e+02,1.368150e+03,1.632465e+04,132.522306,137.892386,120.991550,-66.2544158262,-86.61910292729999,...,12.09370,11.45460,11.45460,12.09370,12.09370,2,1.0,12.09370,11.45460,12.09370
246622,8246221,PCOD,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,-29.6897881162,193.2709511102,...,4.97471,3.58006,3.58006,4.97471,4.97471,1,2.0,4.97471,3.58006,4.97471
79374,8078973,PCOD,3.321870e+04,3.342566e+06,2.585509e+08,23321.647002,16221.583028,21565.097074,-138.0858067433,-161.576943114,...,6.75307,5.77294,5.77294,6.75307,6.75307,2,1.0,6.75307,5.77294,6.75307


In [29]:
# 10 examples -- batch training with SGD
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_SGD(fourlayernet_V2, 
                            train_data=(
                                torch.Tensor(X_train).float()[:10], 
                                torch.Tensor(y_train).float()[:10],
                            ), 
                            num_epochs=10000,
                            parameters={
                                'lr': 1e-3,
                                'momentum': 0.9,
                                'weight_decay': 0.0,
                                'factor': 1/3,
                                'patience': 500,
                                'threshold': 1e-3,
                                'cooldown': 0,
                                'clip_norm': True,
                                'max_norm': 1,
                            },
                            verbose={
                                'print_every': 1000,
                            },
                           )

Training examples: 10

Training FourLayerNet_V2 ...
Epoch:   1000/10000,    Loss: 0.527196
Epoch:   2000/10000,    Loss: 0.401806
Epoch:   3000/10000,    Loss: 0.269227
Epoch:   4000/10000,    Loss: 0.184264
Epoch:   5000/10000,    Loss: 0.130428
Epoch:   6000/10000,    Loss: 0.106597
Epoch:   7000/10000,    Loss: 0.095222
Epoch:   8000/10000,    Loss: 0.092317
Epoch:   9000/10000,    Loss: 0.096431
Epoch:  10000/10000,    Loss: 0.083858


<IPython.core.display.Javascript object>




In [30]:
# 10 examples -- batch training with Adam
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_Adam(fourlayernet_V2, 
                             train_data=(
                                 torch.Tensor(X_train).float()[:10], 
                                 torch.Tensor(y_train).float()[:10],
                             ), 
                             num_epochs=10000,
                             parameters={
                                 'lr': 1e-3,
                                 'betas': (0.9, 0.999),
                                 'weight_decay': 0.0,
                                 'factor': 1/3,
                                 'patience': 500,
                                 'threshold': 1e-3,
                                 'cooldown': 0,
                                 'clip_norm': True,
                                 'max_norm': 1,
                             },
                             verbose={
                                 'print_every': 1000,
                             },
                            )

Training examples: 10

Training FourLayerNet_V2 ...
Epoch   962: reducing learning rate of group 0 to 3.3333e-04.
Epoch:   1000/10000,    Loss: 0.003265
Epoch  1845: reducing learning rate of group 0 to 1.1111e-04.
Epoch:   2000/10000,    Loss: 0.000184
Epoch  2373: reducing learning rate of group 0 to 3.7037e-05.
Epoch:   3000/10000,    Loss: 0.000022
Epoch  3399: reducing learning rate of group 0 to 1.2346e-05.
Epoch:   4000/10000,    Loss: 0.000001
Epoch  4541: reducing learning rate of group 0 to 4.1152e-06.
Epoch:   5000/10000,    Loss: 0.000001
Epoch:   6000/10000,    Loss: 0.000001
Epoch:   7000/10000,    Loss: 0.000000
Epoch:   8000/10000,    Loss: 0.000000
Epoch:   9000/10000,    Loss: 0.000000
Epoch:  10000/10000,    Loss: 0.000000


<IPython.core.display.Javascript object>




In [35]:
# 100 examples -- batch training with SGD
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_SGD(fourlayernet_V2, 
                            train_data=(
                                torch.Tensor(X_train).float()[:100], 
                                torch.Tensor(y_train).float()[:100],
                            ), 
                            num_epochs=100000,
                            parameters={
                                'lr': 1e-3,
                                'momentum': 0.9,
                                'weight_decay': 0.0,
                                'factor': 1/3,
                                'patience': 500,
                                'threshold': 1e-4,
                                'cooldown': 0,
                                'clip_norm': True,
                                'max_norm': 1,
                            },
                            verbose={
                                'print_every': 5000,
                            },
                           )

Training examples: 100

Training FourLayerNet_V2 ...
Epoch:   5000/100000,    Loss: 731898.500000
Epoch:  10000/100000,    Loss: 622220.500000
Epoch 11312: reducing learning rate of group 0 to 3.3333e-04.
Epoch 13939: reducing learning rate of group 0 to 1.1111e-04.
Epoch:  15000/100000,    Loss: 401755.531250
Epoch:  20000/100000,    Loss: 390041.375000
Epoch:  25000/100000,    Loss: 379319.875000
Epoch:  30000/100000,    Loss: 364957.281250
Epoch:  35000/100000,    Loss: 233881.703125
Epoch:  40000/100000,    Loss: 125745.859375
Epoch:  45000/100000,    Loss: 104262.031250
Epoch:  50000/100000,    Loss: 92851.421875
Epoch:  55000/100000,    Loss: 87792.539062
Epoch:  60000/100000,    Loss: 85603.078125
Epoch:  65000/100000,    Loss: 82481.539062
Epoch:  70000/100000,    Loss: 80724.320312
Epoch:  75000/100000,    Loss: 79276.437500
Epoch 79169: reducing learning rate of group 0 to 3.7037e-05.
Epoch 79788: reducing learning rate of group 0 to 1.2346e-05.
Epoch:  80000/100000,    Loss:

<IPython.core.display.Javascript object>




In [36]:
# 100 examples -- batch training with Adam
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_Adam(fourlayernet_V2, 
                             train_data=(
                                 torch.Tensor(X_train).float()[:100], 
                                 torch.Tensor(y_train).float()[:100],
                             ), 
                             num_epochs=100000,
                             parameters={
                                 'lr': 1e-3,
                                 'betas': (0.9, 0.999),
                                 'weight_decay': 0.0,
                                 'factor': 1/3,
                                 'patience': 500,
                                 'threshold': 1e-4,
                                 'cooldown': 0,
                                 'clip_norm': True,
                                 'max_norm': 1,
                             },
                             verbose={
                                 'print_every': 5000,
                             },
                            )

Training examples: 100

Training FourLayerNet_V2 ...
Epoch  3038: reducing learning rate of group 0 to 3.3333e-04.
Epoch:   5000/100000,    Loss: 79171.687500
Epoch  5246: reducing learning rate of group 0 to 1.1111e-04.
Epoch  9742: reducing learning rate of group 0 to 3.7037e-05.
Epoch:  10000/100000,    Loss: 24244.970703
Epoch 10260: reducing learning rate of group 0 to 1.2346e-05.
Epoch:  15000/100000,    Loss: 19665.609375
Epoch:  20000/100000,    Loss: 2950.863770
Epoch:  25000/100000,    Loss: 1957.442871
Epoch:  30000/100000,    Loss: 969.841492
Epoch:  35000/100000,    Loss: 605.340942
Epoch 35054: reducing learning rate of group 0 to 4.1152e-06.
Epoch:  40000/100000,    Loss: 189.638138
Epoch:  45000/100000,    Loss: 114.370216
Epoch:  50000/100000,    Loss: 68.154655
Epoch 52064: reducing learning rate of group 0 to 1.3717e-06.
Epoch 52619: reducing learning rate of group 0 to 4.5725e-07.
Epoch:  55000/100000,    Loss: 57.973988
Epoch:  60000/100000,    Loss: 54.577991
Epoc

<IPython.core.display.Javascript object>




In [None]:
# 10 examples -- batch training
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train(fourlayernet_V2, 
                        train_data=(
                            torch.Tensor(X_train).float()[:100], 
                            torch.Tensor(y_train).float()[:100],
                        ), 
                        parameters={
                            'num_epochs': 100000,
                            'lr': 1e-3,
                            'weight_decay': 0.0,
                            'step_size': 40000,
                            'gamma': 0.33,
                            'clip_norm': True,
                            'max_norm': 1,
                        },
                        verbose={
                            'print_every': 10000,
                        },
                       )

In [None]:
# 10 examples -- batch training [changed lr scheduler -- now ReduceLROnPlateau, previously StepLR]
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_RLRP(fourlayernet_V2, 
                             train_data=(
                                 torch.Tensor(X_train).float()[:10], 
                                 torch.Tensor(y_train).float()[:10],
                             ), 
                             parameters={
                                 'num_epochs': 10000,
                                 'lr': 1e-4,
                                 'weight_decay': 0.0,
                                 'factor': 1/3,
                                 'patience': 500,
                                 'threshold': 1e-2,
                                 'clip_norm': True,
                                 'max_norm': 1,
                             },
                             verbose={
                                 'print_every': 1000,
                             },
                            )

In [None]:
# 100 examples -- batch training [changed lr scheduler -- now ReduceLROnPlateau, previously StepLR]
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_RLRP(fourlayernet_V2, 
                             train_data=(
                                 torch.Tensor(X_train).float()[:100], 
                                 torch.Tensor(y_train).float()[:100],
                             ), 
                             parameters={
                                 'num_epochs': 50000,
                                 'lr': 1e-3,
                                 'weight_decay': 0.0,
                                 'factor': 1/3,
                                 'patience': 1000,
                                 'threshold': 1e-4,
                                 'cooldown': 0,
                                 'clip_norm': True,
                                 'max_norm': 1,
                             },
                             verbose={
                                 'print_every': 2500,
                             },
                            )

In [None]:
# Full batch -- batch training [changed lr scheduler -- now ReduceLROnPlateau, previously StepLR]
fourlayernet_V2 = FourLayerNet_V2()
fourlayernet_V2 = train_RLRP(fourlayernet_V2, 
                             train_data=(
                                 torch.Tensor(X_train).float(), 
                                 torch.Tensor(y_train).float(),
                             ), 
                             parameters={
                                 'num_epochs': 1000,
                                 'lr': 1e-2,
                                 'weight_decay': 0.0,
                                 'factor': 1/3,
                                 'patience': 500,
                                 'threshold': 1e-4,
                                 'cooldown': 0,
                                 'clip_norm': True,
                                 'max_norm': 1,
                             },
                             verbose={
                                 'print_every': 100,
                             },
                            )