In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np

Every TorchVision Dataset includes two arguments: transform and target_transform to modify the samples and labels respectively.

In [3]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [5]:
training_data

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [4]:
test_data[0][1]

9

Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels.

In [19]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle= True, drop_last= True)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

#X is 64 pictures, 1 channel, 28X28
for picture, label in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {picture.shape}")
    print(f"Shape of y: {label.shape} {label.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [7]:
len(train_dataloader)

937

**Creating Models**

In [20]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


**Optimizing the Model Parameters**

To train a model, we need a loss function and an optimizer.

In [21]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and backpropagates the prediction error to adjust the model’s parameters.

In [22]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward() #compute the loss function's gradients
        optimizer.step() #update the parameters
        optimizer.zero_grad() #clear the model's gradient to avoid gradient accumulation

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [23]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [1]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


NameError: name 'train' is not defined

In [25]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1] #first 2 pictures in the data set


In [9]:
test_data[1][0] 

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0510, 0.2627, 0.0000, 0.0000, 0.0000, 0.0000,
          0.1961, 0.1490, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0314,
          0.4706, 0.8196, 0.8863, 0.9686, 0.9294, 1.0000, 1.0000, 1.0000,
          0.9686, 0.9333, 0.9216, 0.6745, 0.2824, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5373, 0.9373,
          0.9882, 0.9529, 0.9176, 0.8980, 0.9333, 0.9569, 0.9647, 0.9412,
          0.9020, 0.9098, 0.9373, 0.9725, 0.9843, 0.7608, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4000, 1.0000, 0.9059,
          0.8941, 0.8902, 0.8941, 0.9137, 0.9020, 0.9020, 0.8980, 0.8941,
          0.9098, 0.9098, 0.9059, 0.8902, 0.8784, 0.9882,

**Showing a picture in the data set**

In [101]:
#2x2 matrix where the first column is the tensor of the picture and the second column is the label of the picture
#reshape to match opencv (w, h, c)
pic = test_data[1][0].reshape(28,28,1)

In [103]:
#scale color from 0-1 to 0-255
pic = np.uint8(pic * 255)

In [100]:
#convert to gray scale
image_bgr = cv.cvtColor(pic, cv2.COLOR_BGR2GRAY)
image_bgr[:, : , 0]


error: OpenCV(4.7.0) d:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.simd_helpers.hpp:92: error: (-2:Unspecified error) in function '__cdecl cv::impl::`anonymous-namespace'::CvtHelper<struct cv::impl::`anonymous namespace'::Set<3,4,-1>,struct cv::impl::A0x981fb336::Set<1,-1,-1>,struct cv::impl::A0x981fb336::Set<0,2,5>,2>::CvtHelper(const class cv::_InputArray &,const class cv::_OutputArray &,int)'
> Invalid number of channels in input image:
>     'VScn::contains(scn)'
> where
>     'scn' is 1


In [98]:
# Reshape the grayscale image to (28, 28, 1)
gray_image = image_bgr[:, :, np.newaxis]
gray_image.shape

(28, 28, 1, 3)

we use the cv2.resize() function to upscale the image. The resize() function takes the following arguments:

The image to be resized (image in this case).
The target size, specified by None in this example, as we want to specify the scale factors instead.

The scaling factors fx and fy, which determine the amount of upscaling. Here, we've set both factors to 2 to double the size of the image.

The interpolation method, which determines how the new pixels are generated. In this case, we've used cv2.INTER_CUBIC, which provides a higher-quality upscaling.

In [8]:
upscaled_image = cv.resize(pic, None, fx=2, fy=2, interpolation=cv.INTER_CUBIC)

In [10]:
cv.imshow("photo", upscaled_image)
cv.waitKey(0)

-1

**Turn a photo up side down and feed it to the model XDDDD**

*Turning an image up side down*

In [49]:
random_pic = test_data[5][0].reshape(28,28,1).numpy()

In [50]:
random_pic.shape

(28, 28, 1)

In [51]:
upsidedown = cv.flip(random_pic, 0)

In [52]:
upsidedown.shape

(28, 28)

*Convert it back to tensor*

In [55]:
upsidedown = torch.from_numpy(upsidedown)

In [56]:
upsidedown.shape

torch.Size([28, 28])

In [57]:
pic_label = test_data[5][1]
pic_label

1

In [63]:
#flatten the image
upsidedown = upsidedown.reshape(1,28,28)

In [64]:
upsidedown.shape

torch.Size([1, 28, 28])

**Turning an image 90 degrees and feed it to the model**

In [73]:
piccc = test_data[28][0].reshape(28, 28, 1).numpy()

In [74]:
# Rotate the image 90 degrees clockwise using rotate() function
rotated_image = cv.rotate(piccc, cv.ROTATE_90_CLOCKWISE)


In [75]:
rotated_image = rotated_image.reshape(1, 28, 28)

In [76]:
rotated_image = torch.from_numpy(rotated_image)

In [77]:
rotate_label = test_data[28][1]

In [108]:
testImg = cv.imread("seventythree.PNG")
testImg = cv.cvtColor(testImg, cv.COLOR_BGR2GRAY)
testImg

array([[ 18,  18,  18, ..., 245, 245, 246],
       [ 18,  18,  18, ..., 245, 245, 246],
       [ 18,  18,  18, ..., 245, 245, 246],
       ...,
       [ 16,  16,  16, ..., 136, 136, 134],
       [ 16,  16,  16, ..., 133, 133, 133],
       [ 16,  16,  16, ..., 125, 125, 125]], dtype=uint8)

**A function that take in any image and resize it to 28, 28 gray scale to pass into the model**

In [115]:
def reformat(img_path):
    '''This function take in a image path and return it as a (1, 28, 28) tensor to pass into the NN Model
    
    Attribute:
    ---------
    img_path : the path to the image on the computer
    '''

    img = cv.imread(img_path)
    

    #displaying the image
    cv.imshow("img", img)
    cv.waitKey(0) 
    
    if (img.shape == (28, 28, 1)):
        return torch.from_numpy(img).reshape(1, 28, 28)

    #resize to 28x28
    img = cv.resize(img, (28, 28)) 

    #convert to gray scale
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) #note that color is still from 0 to 255


    #Making the color to be between 0 and 255
    img = np.float32(img / 255)

    #reshape and convert to tensor
    img_ = torch.from_numpy(img).reshape(1, 28, 28)
    return img_


testtt = reformat("seventythree.PNG")
testtt.shape

torch.Size([1, 28, 28])

'with torch.no_grad' : disables gradient calculation during the forward pass to reduce memory usage and speed up computation when we don't need to compute gradients. It's typically used during inference or evaluation when we don't need to update the model's parameters.

In [82]:

with torch.no_grad():
    rotated_image = rotated_image.to(device)# This line of code moves the input tensor x to the specified device (e.g., CPU or GPU) for computation. 
                    #The to() method is used to transfer tensors between devices.
    pred = model(rotated_image) # a 1 x 10 row vector so have to use index 0 to access the row. 
    predicted, actual = classes[pred[0].argmax(0)], classes[rotate_label] #The element with the highest value is the resulting class
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Shirt", Actual: "Ankle boot"
