In [16]:
# classify wheat seeds using pytorch

import torch
import pandas as pd
import numpy as np

from torch.autograd import Variable
from zipfile import ZipFile

X = []
Y = []

# read in feature data; there are three classes {1, 2, 3}
with ZipFile('data/wheat-seeds.zip') as zf:
    df = pd.read_csv(zf.open('wheat-seeds.csv'), header=None)

    
train = df.sample(frac=0.9)
test = df.drop(train.index)

trX = Variable(torch.Tensor(train[train.columns[:-1]].values))
trY = Variable(torch.LongTensor((train[train.columns[-1]] - 1).values))  # class labels should start at 0


NUM_HIDDEN_1 = 64
NUM_HIDDEN_2 = 64

# Define the model
model = torch.nn.Sequential(
    torch.nn.Linear(train.shape[1]-1, NUM_HIDDEN_1),
    torch.nn.Sigmoid(),
    torch.nn.Linear(NUM_HIDDEN_1, NUM_HIDDEN_2),
    torch.nn.Sigmoid(),
    torch.nn.Linear(NUM_HIDDEN_2, 3),
)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05)


BATCH_SIZE = 64 

# Start training it
for epoch in range(0, 10000):
    for start in range(0, len(trX), BATCH_SIZE):
        end = start + BATCH_SIZE
        batchX = trX[start:end]
        batchY = trY[start:end]

        y_pred = model(batchX)
        loss = loss_fn(y_pred, batchY)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Find loss on training data
    loss = loss_fn(model(trX), trY)
    if epoch % 500 == 0:
        print('Epoch: %d Loss: %f' % (epoch, loss))


# Output now
testX = Variable(torch.Tensor(test[test.columns[:-1]].values))
testY = model(testX)

print(testY)
print(test[test.columns[-1]] - 1)


Epoch: 0 Loss: 1.095950
Epoch: 500 Loss: 0.292872
Epoch: 1000 Loss: 0.190853
Epoch: 1500 Loss: 0.180292
Epoch: 2000 Loss: 0.172462
Epoch: 2500 Loss: 0.164166
Epoch: 3000 Loss: 0.155220
Epoch: 3500 Loss: 0.145367
Epoch: 4000 Loss: 0.135075
Epoch: 4500 Loss: 0.125715
Epoch: 5000 Loss: 0.118258
Epoch: 5500 Loss: 0.112569
Epoch: 6000 Loss: 0.108106
Epoch: 6500 Loss: 0.104257
Epoch: 7000 Loss: 0.100796
Epoch: 7500 Loss: 0.097578
Epoch: 8000 Loss: 0.094564
Epoch: 8500 Loss: 0.091336
Epoch: 9000 Loss: 0.088143
Epoch: 9500 Loss: 0.085031
tensor([[  4.7732,  -2.7703,  -1.8277],
        [  5.1193,  -6.0309,   1.0951],
        [  5.4508,  -3.2130,  -2.0729],
        [  4.7763,  -5.1224,   0.5362],
        [  4.5404,  -4.8281,   0.4704],
        [  4.7421,  -3.2449,  -1.3225],
        [  4.1163,  -6.4945,   2.5940],
        [  2.8549,  -8.0360,   5.4112],
        [  4.7410,  -5.4546,   0.9074],
        [  3.8436,  -2.2795,  -1.3676],
        [  5.0630,  -4.1616,  -0.7222],
        [  4.8084,  -2.7

In [23]:
list(zip((test[test.columns[-1]] - 1).values, testY.max(1)[1]))

[(0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(2)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(0)),
 (0, tensor(2)),
 (0, tensor(2)),
 (1, tensor(1)),
 (1, tensor(1)),
 (2, tensor(2)),
 (2, tensor(2)),
 (2, tensor(2)),
 (2, tensor(2)),
 (2, tensor(2))]