In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
training_data=datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data=datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [3]:
batch_size=64 # each element in the iterable will return a batch of 64 features & labels
#creating dataloaders helps wrap an iterable over the dataset and support 
#automatic batching, sampling, shuffling and multiprocess data loading
train_dataloader=DataLoader(training_data,batch_size=batch_size)
test_dataloader=DataLoader(test_data,batch_size=batch_size)

for X,y in test_dataloader:
    print(f"Shape of X [N,C,H,W]:{X.shape}")
    print(f"Shape of y:{y.shape} {y.dtype}")
    break

Shape of X [N,C,H,W]:torch.Size([64, 1, 28, 28])
Shape of y:torch.Size([64]) torch.int64


#### creating models

In [4]:
device=(
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)

print(f"using {device} device")


class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten=nn.Flatten()
        self.linear_relu_stack=nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    def forward(self,x):
        x=self.flatten(x)
        logits=self.linear_relu_stack(x)
        return logits

model=NeuralNetwork().to(device)
print(model)


using mps device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=1e-3)

In [10]:
def train(dataloader,model,loss_fn,optimizer):
    size=len(dataloader.dataset)

    model.train()
    for batch, (X,y) in enumerate(dataloader):
        X,y=X.to(device), y.to(device)

        pred=model(X)
        loss=loss_fn(pred,y)

        #backprop
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch%100==0:
            loss,current=loss.item(), (batch+1)*len(X)
            print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")

In [14]:
def test(dataloader,model,loss_fn):
    size=len(dataloader.dataset)
    num_batches=len(dataloader)
    model.eval()
    test_loss, correct=0,0
    with torch.no_grad():
        for X,y in dataloader:
            X,y=X.to(device),y.to(device)
            pred=model(X)
            test_loss+=loss_fn(pred,y).item()
            correct+=(pred.argmax(1)==y).type(torch.float).sum().item()
    test_loss/=num_batches
    correct/=size

    print(f"Test Error: \n Accuracy{(100*correct):>0.1f}%,Avg loss:{test_loss:>8f}\n")

In [15]:
epochs=5
for t in range(epochs):
    print(f"Epoch {t+1}\n.......")
    train(train_dataloader,model,loss_fn,optimizer)
    test(test_dataloader,model,loss_fn)
print("Done")

Epoch 1
.......
loss:2.167007 [   64/60000]
loss:2.162381 [ 6464/60000]
loss:2.105381 [12864/60000]
loss:2.122272 [19264/60000]
loss:2.086226 [25664/60000]
loss:2.012171 [32064/60000]
loss:2.061523 [38464/60000]
loss:1.971271 [44864/60000]
loss:1.963694 [51264/60000]
loss:1.913458 [57664/60000]
Test Error: 
 Accuracy49.8%,Avg loss:1.907460

Epoch 2
.......
loss:1.929211 [   64/60000]
loss:1.902734 [ 6464/60000]
loss:1.789418 [12864/60000]
loss:1.835364 [19264/60000]
loss:1.737037 [25664/60000]
loss:1.671168 [32064/60000]
loss:1.717328 [38464/60000]
loss:1.602671 [44864/60000]
loss:1.621260 [51264/60000]
loss:1.530131 [57664/60000]
Test Error: 
 Accuracy62.3%,Avg loss:1.541269

Epoch 3
.......
loss:1.599495 [   64/60000]
loss:1.561600 [ 6464/60000]
loss:1.420911 [12864/60000]
loss:1.491156 [19264/60000]
loss:1.375395 [25664/60000]
loss:1.363388 [32064/60000]
loss:1.390612 [38464/60000]
loss:1.302916 [44864/60000]
loss:1.334242 [51264/60000]
loss:1.239796 [57664/60000]
Test Error: 
 Accu

In [16]:
torch.save(model.state_dict(),"model.pth")
print("Saved pytorch model to state model.pth")

Saved pytorch model to state model.pth


In [17]:
#loading models
model=NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth",weights_only=True))

<All keys matched successfully>

In [18]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x,y=test_data[0][0],test_data[0][1]

with torch.no_grad():
    x=x.to(device)
    pred=model(x)
    predicted,actual=classes[pred[0].argmax(0)],classes[y]
    print(f'Predicted "{predicted}", Actual:{actual}')

Predicted "Ankle boot", Actual:Ankle boot


## Tensors

tensors are similar to numpy's ndarrays, except that tensors can run on GPUs or other hardware acceleators.  
Tensors are also optimized for automatic differentiation

In [19]:
import torch
import numpy as np

In [20]:
data=[[1,2],[3,4]]
x_data=torch.tensor(data)

In [21]:
np_array=np.array(data)
x_np=torch.from_numpy(np_array)

In [22]:
x_ones=torch.ones_like(x_data)
print(f"Ones Tensor {x_ones}")

x_rand=torch.rand_like(x_data,dtype=torch.float32)
print(f"x rand {x_rand}")

Ones Tensor tensor([[1, 1],
        [1, 1]])
x rand tensor([[0.5781, 0.5598],
        [0.7641, 0.7015]])


In [25]:
shape=(2,3)
rand_tensor=torch.rand(shape)
ones_tensor=torch.ones(shape)
zeros_tensor=torch.zeros(shape)

print(f"Random tensor{rand_tensor}")
print(f"ones tensor {ones_tensor}")
print(f"zeros tensor:{zeros_tensor}")

Random tensortensor([[0.9384, 0.6160, 0.9057],
        [0.3948, 0.9221, 0.6761]])
ones tensor tensor([[1., 1., 1.],
        [1., 1., 1.]])
zeros tensor:tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [27]:
tensor=torch.rand(3,4)

print(tensor.shape)
print(tensor.dtype)
print(tensor.device)

torch.Size([3, 4])
torch.float32
cpu


In [28]:
device="mps"

tensor=tensor.to(device)

In [33]:
tensor=torch.ones(4,4)
print(f"first row{tensor[0]}")
print(f"first col { tensor[:,0]}")
print(f"last col {tensor[:,-1]}")


first rowtensor([1., 1., 1., 1.])
first col tensor([1., 1., 1., 1.])
last col tensor([1., 1., 1., 1.])


In [30]:
print(tensor)
tensor.shape

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


torch.Size([4, 4])

In [31]:
tensor.ndim

2

In [34]:
torch.cat([tensor,tensor,tensor,tensor],dim=1)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [35]:
y1=tensor@tensor.T
y2=tensor.matmul(tensor.T)

y3=torch.rand_like(y1)
torch.matmul(tensor,tensor.T,out=y3)

z1=tensor*tensor
z2=tensor.mul(tensor)

z3=torch.rand_like(tensor)

torch.mul(tensor,tensor,out=z3)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [36]:
agg=tensor.sum()
agg_item=agg.item()
print(agg,agg_item,type(agg_item))

tensor(16.) 16.0 <class 'float'>
