In [4]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [2]:
# import and process data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import seaborn as sns
iris = sns.load_dataset('iris')

iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
# organise the data

# convert from pandas df to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# transform species to numbers
labels = torch.zeros(len(data), dtype=torch.long)
# labels[iris.species == 'setosa'] = 0 not required as already zeroes are there
labels[iris.species == 'versicolor'] = 1
labels[iris.species == 'virginica'] = 2

### Separate data into test and train sets

In [36]:
trainingProportion = 0.8 # 80% for training
nTraining = int(len(labels)*trainingProportion) # 80% of the size of labels

trainTestSelector = np.zeros(len(labels), dtype=bool) # bool array to determine which label is for training/testing

# randmonly chosing nTraining numbers from an array [0, 1, ..len(labels)-1]. This will act as index for next step.
items2use4train = np.random.choice(range(len(labels)), nTraining, replace=False) 

# setting True for all items2use4train indexes
trainTestSelector[items2use4train] = True


#### Testing whether the selected data is balanced

In [37]:
print('Average of full data: ')
print(torch.mean(labels.float())) # will be 1 because there are equal 0s, 1s and 2s
print(' ')

print('Average of training data: ')
print(torch.mean(labels[trainTestSelector].float())) # should be close to 1
print(' ')

print('Average of testing data: ')
print(torch.mean(labels[~trainTestSelector].float())) # should be close to 1
print(' ')

Average of full data: 
tensor(1.)
 
Average of training data: 
tensor(0.9583)
 
Average of testing data: 
tensor(1.1667)
 


In [38]:
# Create the model

ANNiris = nn.Sequential(
    nn.Linear(4,64),
    nn.ReLU(),
    nn.Linear(64,64),
    nn.ReLU(),
    nn.Linear(64,3)
)

lossFunction = nn.CrossEntropyLoss() # SoftMax already included in the loss function

optimizer = torch.optim.SGD(ANNiris.parameters(), lr=0.01)

In [39]:
# train the model

epochs = 1000

losses = torch.zeros(epochs)
ongoingAcc = []

for epoch in range(epochs):
    yHat = ANNiris(data[trainTestSelector])

    loss = lossFunction(yHat, labels[trainTestSelector])
    losses[epoch] = loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    ongoingAcc.append( torch.mean((torch.argmax(yHat, axis=1) == labels[trainTestSelector]).float()) * 100 )


In [40]:
# final forward pass using training data
predictions = ANNiris(data[trainTestSelector])
predictedLabels = torch.argmax(predictions, axis=1)
trainAcc = torch.mean((predictedLabels==labels[trainTestSelector]).float())*100
print(trainAcc)

# final forward pass using testing data
predictions = ANNiris(data[~trainTestSelector])
predictedLabels = torch.argmax(predictions, axis=1)
testAcc = torch.mean((predictedLabels==labels[~trainTestSelector]).float())*100
print(testAcc)

tensor(98.3333)
tensor(100.)
