In [1]:
import numpy as np
import torch 
import torch.nn as nn

In [2]:
# import dataset
import seaborn as sns
iris = sns.load_dataset('iris')

# convert from pandas dataframe to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# transform species to numbers
labels = torch.zeros(len(data), dtype = torch.long)

labels[iris.species == 'versicolor'] = 1
labels[iris.species == 'virginica'] = 2

In [6]:
# how many training examples
propTraining = .8   # in proportion
nTraining = int(len(labels)*propTraining)

# initialize a boolean vector to select data and labels
traintestBool = np.zeros(len(labels), dtype = bool)

# this method doesnt randomize the test train data
# trainingsetBool[range(nTraining)] = True

# this is better
items2use4train = np.random.choice(range(len(labels)),nTraining, replace=False)
traintestBool[items2use4train] = True

traintestBool

array([ True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True, False,  True,  True,
       False,  True, False,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True, False,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True, False,  True, False,  True,  True,
       False,  True, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False,  True,  True,
       False,  True, False,  True,  True, False, False,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
       False, False,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False, False,  True,
        True,  True,

In [7]:
# create a ANN model

# model architecture
ANNiris = nn.Sequential(
    nn.Linear(4,64),    # input layer
    nn.ReLU(),          # activation unit
    nn.Linear(64,64),   # hidden layer
    nn.ReLU(),          # activation unit
    nn.Linear(64,3)     # output units
)


# loss function
lossfun = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(ANNiris.parameters(), lr = .01)

In [8]:
# train the model
numepochs = 1000

# initialize losses
losses = torch.zeros(numepochs)
ongoingAcc = []

# loop over the epochs
for epochi in range(numepochs):

    # forward pass 
    yHat = ANNiris(data[traintestBool,:])

    # compute accuracy
    ongoingAcc.append(100*torch.mean(
        (torch.argmax(yHat, axis = 1) == labels[traintestBool]).float()))
    
    # compute loss
    loss = lossfun(yHat, labels[traintestBool])
    losses[epochi] = loss

    # backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [11]:
# compute train and test accuracies 

# final forward pass using training data
predictions = ANNiris(data[traintestBool, :])
trainacc = 100*torch.mean((torch.argmax(predictions, axis = 1) == labels[traintestBool]).float())

# final forward pass using Test data
predictions = ANNiris(data[~traintestBool,:])
testacc = 100*torch.mean((torch.argmax(predictions, axis = 1) == labels[~traintestBool]).float())

In [12]:
# report accuracies
print('Final train accuracy : %g%%' %trainacc)
print('Final test accuracy : %g%%' %testacc)

Final train accuracy : 97.5%
Final test accuracy : 96.6667%
