In [1]:
import torch
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits

In [2]:
X,y = load_digits(return_X_y=True)
X=X.flatten().reshape(-1,64)
X,y,X.shape,y.shape

(array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 array([0, 1, 2, ..., 8, 9, 8]),
 (1797, 64),
 (1797,))

In [3]:
s = StandardScaler()
X = s.fit_transform(X)
X

array([[ 0.        , -0.33501649, -0.04308102, ..., -1.14664746,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -1.09493684, ...,  0.54856067,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -1.09493684, ...,  1.56568555,
         1.6951369 , -0.19600752],
       ...,
       [ 0.        , -0.33501649, -0.88456568, ..., -0.12952258,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -0.67419451, ...,  0.8876023 ,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649,  1.00877481, ...,  0.8876023 ,
        -0.26113572, -0.19600752]])

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [5]:
X_train = torch.tensor(X_train,dtype=torch.float)
X_test = torch.tensor(X_test,dtype=torch.float)
y_train = torch.tensor(y_train,dtype=torch.long)
y_test = torch.tensor(y_test,dtype=torch.long)

In [6]:
input_dim = 64
hidden_dim = 16
output_dim = 10

In [7]:
w1 = torch.randn(input_dim,hidden_dim,dtype=torch.float32,requires_grad=True)
b1 = torch.zeros(hidden_dim,dtype=torch.float32,requires_grad=True)
w2 = torch.randn(hidden_dim,output_dim,dtype=torch.float32,requires_grad=True)
b2 = torch.zeros(output_dim,dtype=torch.float32,requires_grad=True)

w1,b1,w2,b2,w1.shape,b1.shape,w2.shape,b2.shape

(tensor([[ 0.4224, -0.3956, -0.3988,  ...,  0.3260, -0.5974,  0.8860],
         [ 0.8344,  1.6975, -0.6181,  ...,  1.5176, -1.3268,  0.2486],
         [ 0.0746,  0.2255,  0.3638,  ...,  1.2983, -1.7839, -0.0786],
         ...,
         [-1.0719, -0.4248, -0.2197,  ..., -0.6485,  0.6386,  0.0296],
         [-0.5586, -0.0814, -0.7857,  ..., -0.3541,  0.8976,  0.7863],
         [ 1.8987,  0.0705, -0.1088,  ...,  1.7369, -0.4838,  1.7236]],
        requires_grad=True),
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        requires_grad=True),
 tensor([[ 7.7723e-01, -1.7590e-01,  5.1182e-02, -2.8055e-01, -3.6533e-01,
          -1.1593e+00,  5.9089e-01,  7.3143e-01,  4.2362e-01, -5.8089e-01],
         [-1.0640e-01, -3.7338e-01,  2.6820e+00, -5.7596e-02,  2.1285e+00,
          -6.3923e-01,  1.5468e-01, -1.1450e+00, -3.0831e-01,  9.2964e-01],
         [ 1.1661e-01, -4.5583e-01,  4.1474e-01, -8.2137e-01,  1.2185e+00,
          -2.1218e+00,  2.5652e-01, -2.5174e+00,  

In [8]:
def relu(x):
  return torch.maximum(x,torch.zeros_like(x))

def softmax(x):
  ex = torch.exp(x-torch.max(x,dim=1,keepdim=True)[0])
  return ex/ex.sum(dim=1,keepdim=True)



In [9]:
def forward(x):
  z1 = torch.matmul(x,w1)+b1
  a1 = relu(z1)
  z2 = torch.matmul(a1,w2)+b2
  a2 = softmax(z2)
  return a2

def predict(x):
  with torch.no_grad():
    y_pred = forward(x)
    _,prediction=torch.max(y_pred,dim=1)
  return prediction

In [10]:
def CE_loss(y_pred,y_true):
  y_one_hot = torch.zeros_like(y_pred)
  y_one_hot[torch.arange(y_pred.shape[0]),y_true] = 1
  return -torch.mean(torch.sum(y_one_hot*torch.log(y_pred+1e-2),dim=1))

In [11]:
lr = 1e-2
epochs = 100
for epoch in range(epochs):
  y_pred = forward(X_train)
  loss = CE_loss(y_pred,y_train)
  loss.backward()

  with torch.no_grad():
    w1 -= lr*w1.grad
    b1 -= lr*b1.grad
    w2 -= lr*w2.grad
    b2 -= lr*b2.grad

    w1.grad.zero_()
    b1.grad.zero_()
    w2.grad.zero_()
    b2.grad.zero_()

  print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item()}")

Epoch 1/100 Loss: 4.044790744781494
Epoch 2/100 Loss: 4.044158458709717
Epoch 3/100 Loss: 4.043522357940674
Epoch 4/100 Loss: 4.042882442474365
Epoch 5/100 Loss: 4.042239665985107
Epoch 6/100 Loss: 4.041592597961426
Epoch 7/100 Loss: 4.040943145751953
Epoch 8/100 Loss: 4.040288925170898
Epoch 9/100 Loss: 4.039631366729736
Epoch 10/100 Loss: 4.038969993591309
Epoch 11/100 Loss: 4.03830623626709
Epoch 12/100 Loss: 4.037639617919922
Epoch 13/100 Loss: 4.036968231201172
Epoch 14/100 Loss: 4.036293029785156
Epoch 15/100 Loss: 4.035613059997559
Epoch 16/100 Loss: 4.0349297523498535
Epoch 17/100 Loss: 4.034241676330566
Epoch 18/100 Loss: 4.033548831939697
Epoch 19/100 Loss: 4.03285026550293
Epoch 20/100 Loss: 4.0321478843688965
Epoch 21/100 Loss: 4.031440258026123
Epoch 22/100 Loss: 4.030726909637451
Epoch 23/100 Loss: 4.030007839202881
Epoch 24/100 Loss: 4.0292840003967285
Epoch 25/100 Loss: 4.0285539627075195
Epoch 26/100 Loss: 4.027818202972412
Epoch 27/100 Loss: 4.02707576751709
Epoch 28/

In [12]:
y_pred = predict(X_test)
accuracy = (y_pred==y_test).sum().item()/y_test.size(0)
print(f"Accuracy: {accuracy}")

Accuracy: 0.14722222222222223
