In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from torchsummary import summary

%matplotlib notebook

In [2]:
# Function to train net
def train(net, train_data, parameters, verbose, options):
    """
    Train NN on provided data sets.

    Args:
        net: neural network
        train_data: Dataset containing training set
        parameters: dict of hyperparameters for training with Adam
            - lr: learning rate [default (0.001)]
            - betas: default (0.9, 0.999)
            - eps: epsilon [default (1e-8)]
            - weight_decay: L2 regularization [default (0.0)]
            - num_epochs: number of epochs [default (1)]
        verbose: dict of printing actions
            - verbose: print real-time training results [default (True)]
            - print_every: interval to print real-time training results [default (1)]
        options: dict of optional actions
            - plot_loss: generate plot of training curves [default (False)]
    """

    X_train, y_train = train_data
    num_train = X_train.size(0)
    print('Training examples: ', num_train)
    print('')

    # Loss function and optimization method
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(),
                           lr=parameters.get('lr', 1e-3),
                           betas=parameters.get('betas', (0.9, 0.999)),
                           eps=parameters.get('eps', 1e-8),
                           weight_decay=parameters.get('weight_decay', 0.0))

    num_epochs = parameters.get('num_epochs', 1)
    train_loss = np.zeros(num_epochs)
    print('Training %s ...' % net.__class__.__name__)

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = net(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        running_loss = loss.item()
    
        if verbose.get('verbose', True):
            if (epoch + 1) % verbose.get('print_every', 1) == 0:
                print('Epoch: %d/%d,    Loss: %.6f' %
                      (epoch + 1, num_epochs, running_loss / num_train))

        train_loss[epoch] = running_loss / num_train

    if options.get('plot_loss', False):
        plotloss(net.__class__.__name__, train_loss.flatten())
        plt.show()

    print('')

    return net

In [3]:
# Plot training loss and, optionally, validation loss
def plotloss(title, train_loss, val_loss=None):
    num_epochs = len(train_loss)
    plt.figure(figsize=(8, 6))
    plt.plot(np.arange(1, num_epochs + 1), train_loss, label='Train')
    if val_loss is not None:
        plt.plot(np.arange(1, num_epochs + 1), val_loss, label='Validation')
    plt.title(title)
    plt.xlabel('Epochs')
    # plt.xticks(np.arange(1, num_epochs + 1))  # 5 tick marks
    plt.ylabel('MSE Loss')
    plt.yscale('log')
    plt.legend()
    plt.tight_layout()

In [4]:
# dataset is ordered by species i.e. first 50 entries are setosa,
#                                    second 50 entires are versicolor,
#                                    last 50 entries are virginica
iris = pd.read_csv('../data/iris.csv')
iris

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [6]:
# normalize features
for i, col in enumerate(iris.columns[1:-1]):
    iris[col] = (iris[col] - iris[col].min()) / (iris[col].max() - iris[col].min())
iris

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,0.222222,0.625000,0.067797,0.041667,Iris-setosa
1,2,0.166667,0.416667,0.067797,0.041667,Iris-setosa
2,3,0.111111,0.500000,0.050847,0.041667,Iris-setosa
3,4,0.083333,0.458333,0.084746,0.041667,Iris-setosa
4,5,0.194444,0.666667,0.067797,0.041667,Iris-setosa
...,...,...,...,...,...,...
145,146,0.666667,0.416667,0.711864,0.916667,Iris-virginica
146,147,0.555556,0.208333,0.677966,0.750000,Iris-virginica
147,148,0.611111,0.416667,0.711864,0.791667,Iris-virginica
148,149,0.527778,0.583333,0.745763,0.916667,Iris-virginica


In [6]:
iris.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,0.428704,0.439167,0.467571,0.457778
std,43.445368,0.230018,0.180664,0.299054,0.317984
min,1.0,0.0,0.0,0.0,0.0
25%,38.25,0.222222,0.333333,0.101695,0.083333
50%,75.5,0.416667,0.416667,0.567797,0.5
75%,112.75,0.583333,0.541667,0.694915,0.708333
max,150.0,1.0,1.0,1.0,1.0


In [9]:
fig = plt.figure(figsize=(8, 6))
for i, col in enumerate(iris.columns[1:-1], start=1):
    plt.subplot(2, 2, i)
    plt.hist(iris[col][:50], bins=np.linspace(0, 1, 40), 
             alpha=0.3, edgecolor='black', label='iris-setosa')
    plt.hist(iris[col][50:100], bins=np.linspace(0, 1, 40), 
             alpha=0.3, edgecolor='black', label='iris-versicolor')
    plt.hist(iris[col][100:], bins=np.linspace(0, 1, 40), 
             alpha=0.3, edgecolor='black', label='iris-virginica')
    plt.title(col)
    plt.ylabel('Frequency')
plt.legend()
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [8]:
iris = iris.drop('Id', axis=1)

In [9]:
# encode species labels
iris.loc[iris.Species == 'Iris-setosa', 'Species'] = 0
iris.loc[iris.Species == 'Iris-versicolor', 'Species'] = 1
iris.loc[iris.Species == 'Iris-virginica', 'Species'] = 2
iris

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,0.222222,0.625000,0.067797,0.041667,0
1,0.166667,0.416667,0.067797,0.041667,0
2,0.111111,0.500000,0.050847,0.041667,0
3,0.083333,0.458333,0.084746,0.041667,0
4,0.194444,0.666667,0.067797,0.041667,0
...,...,...,...,...,...
145,0.666667,0.416667,0.711864,0.916667,2
146,0.555556,0.208333,0.677966,0.750000,2
147,0.611111,0.416667,0.711864,0.791667,2
148,0.527778,0.583333,0.745763,0.916667,2


In [10]:
# split dataset into train, valid, test
X_train, X_test, y_train, y_test = train_test_split(iris[iris.columns[0:4]].values,
                                                    iris.Species.values, test_size=0.2,
                                                    shuffle=True, random_state=12)

In [11]:
fig = plt.figure(figsize=(8, 6))
plt.bar([0, 1, 2], [(y_train == 0).sum(), (y_train == 1).sum(), (y_train == 2).sum()])
plt.title('Training label distribution')
plt.xlabel('Species')
plt.xticks([0, 1, 2], ['iris-setosa', 'iris-versicolor', 'iris-virginica'],
           rotation=20)
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [12]:
class TwoLayerNet(nn.Module):
    def __init__(self):
        super(TwoLayerNet, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(4, 100),
            nn.Linear(100, 3),
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.classifier(x)
        x = self.softmax(x)
        return x

In [13]:
net = TwoLayerNet()
summary(net, (1, 4))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]             500
            Linear-2                 [-1, 1, 3]             303
           Softmax-3                 [-1, 1, 3]               0
Total params: 803
Trainable params: 803
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [14]:
X_train = torch.Tensor(X_train).float()
y_train = torch.Tensor(y_train.astype(np.int16)).long()
X_test = torch.Tensor(X_test).float()
y_test = torch.Tensor(y_test.astype(np.int16)).long()

In [15]:
net = train(net, train_data=(X_train, y_train), 
            parameters={'num_epochs': 1000,
                        'lr': 0.001,
                        'weight_decay': 0.0
                       },
            verbose={'print_every': 100},
            options={'plot_loss': True},
           )

Training examples:  120

Training TwoLayerNet ...
Epoch: 100/1000,    Loss: 0.006802
Epoch: 200/1000,    Loss: 0.005871
Epoch: 300/1000,    Loss: 0.005373
Epoch: 400/1000,    Loss: 0.005117
Epoch: 500/1000,    Loss: 0.004992
Epoch: 600/1000,    Loss: 0.004923
Epoch: 700/1000,    Loss: 0.004881
Epoch: 800/1000,    Loss: 0.004853
Epoch: 900/1000,    Loss: 0.004833
Epoch: 1000/1000,    Loss: 0.004818


<IPython.core.display.Javascript object>




In [16]:
predict_out = net(X_test)
_, predict_y = torch.max(predict_out, 1)

print('prediction accuracy', accuracy_score(y_test.data, predict_y.data))
print('macro precision', precision_score(y_test.data, predict_y.data, average='macro'))
print('micro precision', precision_score(y_test.data, predict_y.data, average='micro'))
print('macro recall', recall_score(y_test.data, predict_y.data, average='macro'))
print('micro recall', recall_score(y_test.data, predict_y.data, average='micro'))

prediction accuracy 0.9333333333333333
macro precision 0.9393939393939394
micro precision 0.9333333333333333
macro recall 0.9259259259259259
micro recall 0.9333333333333333
