In [25]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np


In [39]:

data = load_digits()
print(data)

X = data.data
y = data.target

device = torch.device("cpu")
X = torch.from_numpy(X).to(device)
y = torch.from_numpy(y).to(device)

IN_FEATURES = len(X[0]) # in feature 64 neurons
OUT_FEATURES = 10 # out feature 10 neurons
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]]), 'target': array([0, 1, 2, ..., 8, 9, 8]), 'frame': None, 'feature_names': ['pixel_0_0', 'pixel_0_1', 'pixel_0_2', 'pixel_0_3', 'pixel_0_4', 'pixel_0_5', 'pixel_0_6', 'pixel_0_7', 'pixel_1_0', 'pixel_1_1', 'pixel_1_2', 'pixel_1_3', 'pixel_1_4', 'pixel_1_5', 'pixel_1_6', 'pixel_1_7', 'pixel_2_0', 'pixel_2_1', 'pixel_2_2', 'pixel_2_3', 'pixel_2_4', 'pixel_2_5', 'pixel_2_6', 'pixel_2_7', 'pixel_3_0', 'pixel_3_1', 'pixel_3_2', 'pixel_3_3', 'pixel_3_4', 'pixel_3_5', 'pixel_3_6', 'pixel_3_7', 'pixel_4_0', 'pixel_4_1', 'pixel_4_2', 'pixel_4_3', 'pixel_4_4', 'pixel_4_5', 'pixel_4_6', 'pixel_4_7', 'pixel_5_0', 'pixel_5_1', 'pixel_5_2', 'pixel_5_3', 'pixel_5_4', 'pixel_5_5', 'pixel_5_6', 'pixel_5_7', 'pixel_6_0', '

In [40]:
class LoadDigitsModel(nn.Module):
    def __init__(self, in_features, out_features, hidden_layer=256):
        super().__init__()
        self.net_layers = nn.Sequential(
            nn.Linear(in_features=in_features,
                      out_features=hidden_layer),
            nn.ReLU(),
            nn.Linear(in_features=hidden_layer,
                      out_features=hidden_layer),
            nn.ReLU(),
            nn.Linear(in_features=hidden_layer,
                      out_features=out_features)
        )

    def forward(self, x: torch.Tensor)-> torch.Tensor :
        return self.net_layers(x)

In [41]:
model = LoadDigitsModel(IN_FEATURES, OUT_FEATURES)

In [42]:
# this is multi classification dataset should use CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()
# optimizer we can use Stochastic Gradient Descent
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [43]:
model.state_dict()

OrderedDict([('net_layers.0.weight',
              tensor([[ 0.0045,  0.0512, -0.0975,  ...,  0.0929, -0.0562, -0.1146],
                      [ 0.0643,  0.0723,  0.0638,  ...,  0.0479, -0.0859, -0.0221],
                      [-0.1207,  0.0423,  0.0467,  ...,  0.0962, -0.0574, -0.0862],
                      ...,
                      [-0.1130, -0.0873,  0.1070,  ...,  0.0027, -0.0237,  0.0032],
                      [ 0.0899,  0.1021, -0.1095,  ..., -0.0595,  0.1108,  0.0241],
                      [-0.0909, -0.1213, -0.1035,  ...,  0.0373, -0.0848,  0.0414]])),
             ('net_layers.0.bias',
              tensor([ 8.1947e-02, -8.3886e-02,  8.5035e-02, -6.1681e-02, -8.6409e-02,
                       5.0124e-02,  3.9250e-02,  6.9183e-02,  3.6436e-02,  7.5826e-03,
                      -6.3877e-02,  5.8711e-02, -1.1842e-01, -8.9530e-02,  5.0795e-02,
                      -9.8491e-02, -1.1038e-01,  8.4687e-02,  4.8554e-02, -8.4487e-02,
                       5.1227e-02, -8.7931e-02

In [44]:
def acc(y_pred, y_value):
    true_value = torch.eq(y_pred, y_value).sum().item() # compare values that are the same and sum them up and get the int value
    return 100*true_value/len(y_value)

In [45]:
epochs = 100
epoch_count=[]
training_loss=[]
testing_loss=[]
training_acc=[]
testing_acc=[]
for epoch in range(epochs):
    # put model in training mode
    model.train() 

    #1. do forward pass, output of CrossEntropyLoss output is logits
    logits = model(X_train)

    #2. calculate the loss
    train_loss = loss_fn(logits, y_train.type(torch.LongTensor))
    # since output of crossentropyLoss has a softmax activation, the output is already in prob, just grab the max value
    y_pred = logits.argmax(dim=1)

    #3. zero gradient since calculation accumulates
    optimzer.zero_grad()

    #4. do back propagation
    train_loss.backwards()

    #5. do gradient descent
    optimizer.step()

    # Testing
    model.eval()
    with torch.inference_mod():
        test_logits = model(X_test)
        test_loss = loss_fn(test_logits, y_test.type(torch.LongTensor))
        test_y_pred = test_logits.argmax(dim=1)
        
        train_acc = acc(y_pred, y_train)
        test_acc = acc(test_y_pred, y_test)

        epoch_count.append(epoch)
        training_loss.append(train_loss)
        testing_loss.append(test_loss)
        training_acc.append(train_acc)
        testing_acc.append(test_acc)

        print(f"Epoch: {epoch}, Train Loss: {train_loss}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss}, Test Acc: {test_acc:.2f}%")

    

RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float