In [3]:
import numpy as np
import pandas as pd

data = pd.read_csv('data_3_1_1.csv').to_numpy()

x = data[:, :-1]
y = data[:, -1]

print('x', x.shape)
print('y', y.shape)

x (100, 2)
y (100,)


## Create a model

In [4]:
import torch
import torch.nn as nn

class MyNeuralNetworkModel(nn.Module):
  '''
  My Neural Network Model for binary classification. 
  The input is a pair (x1, x2) and the output is a binary label.
  It consists of 2 hidden layers of signmoid activation function of 5 neurons.
  '''

  def __init__(self):
    super(MyNeuralNetworkModel, self).__init__()
    self.layer1 = nn.Linear(2, 5)
    self.layer2 = nn.Linear(5, 5)
    self.layer3 = nn.Linear(5, 2)

  def forward(self, x): # x: tensor (m, 2)
    output = self.layer1(x) # output: tensor (m, 5)
    output = torch.sigmoid(output)
    output = self.layer2(output) # output: tensor (m, 5)
    output = torch.sigmoid(output)
    output = self.layer3(output) # output: tensor (m, 2)
    # output = torch.sigmoid(output) # Remove this signmoid as we will use CrossEntropyLoss
    return output


tmp = torch.randn(100, 2)
model = MyNeuralNetworkModel()
output = model(tmp)

print('output', output.shape)

output torch.Size([100, 2])


## Training

In [14]:
import torch.optim as optim

model = MyNeuralNetworkModel()

opt = optim.SGD(model.parameters(), lr=0.9)

# cost_func = nn.MSELoss() # Mean Squared Error Loss

# We output 2 classes, so we use CrossEntropyLoss
cost_func = nn.CrossEntropyLoss() # Cross Entropy Loss

# If we output 1 class, we can use BCELoss
# cost_func = nn.BCELoss() # Binary Cross Entropy Loss
# Better version of BCELoss
# cost_func = nn.BCEWithLogitsLoss() # Binary Cross Entropy Loss with Logits


tx = torch.tensor(x, dtype=torch.float32)

# In order to make it work with CrossEntropyLoss, we need to convert y to a tensor of type long
ty = torch.tensor(y, dtype=torch.long) # ty: tensor (m,)

for i in range(2000):
  tz = model(tx) # tz: tensor (m, 2)

  J = cost_func(tz, ty) # J: tensor (1)
  
  print('iter: %d, J: %f' % (i, J.item()))
  
  J.backward()
  opt.step() # update gradient
  opt.zero_grad()  # reset gradient

iter: 0, J: 0.698558
iter: 1, J: 0.693139
iter: 2, J: 0.692604
iter: 3, J: 0.692518
iter: 4, J: 0.692471
iter: 5, J: 0.692425
iter: 6, J: 0.692375
iter: 7, J: 0.692321
iter: 8, J: 0.692263
iter: 9, J: 0.692198
iter: 10, J: 0.692127
iter: 11, J: 0.692047
iter: 12, J: 0.691958
iter: 13, J: 0.691858
iter: 14, J: 0.691744
iter: 15, J: 0.691614
iter: 16, J: 0.691463
iter: 17, J: 0.691285
iter: 18, J: 0.691075
iter: 19, J: 0.690823
iter: 20, J: 0.690513
iter: 21, J: 0.690128
iter: 22, J: 0.689637
iter: 23, J: 0.688999
iter: 24, J: 0.688147
iter: 25, J: 0.686986
iter: 26, J: 0.685378
iter: 27, J: 0.683153
iter: 28, J: 0.680181
iter: 29, J: 0.676522
iter: 30, J: 0.672512
iter: 31, J: 0.668481
iter: 32, J: 0.664425
iter: 33, J: 0.660094
iter: 34, J: 0.655230
iter: 35, J: 0.649612
iter: 36, J: 0.643020
iter: 37, J: 0.635183
iter: 38, J: 0.625754
iter: 39, J: 0.614363
iter: 40, J: 0.600858
iter: 41, J: 0.585580
iter: 42, J: 0.568988
iter: 43, J: 0.551049
iter: 44, J: 0.531506
iter: 45, J: 0.51029

## Evaluating the model

In [15]:
tz = model(tx) # tz: tensor (m, 2)
# argmax returns the index of the maximum value 
predict = torch.argmax(tz, dim=1) # predict: tensor (m,) of type long

accuracy = (predict == ty).sum().item() / len(ty)
print('Accuracy rate: %.4f' % accuracy)

Accuracy rate: 0.9800
