In [83]:
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
import torch

In [119]:
data_path = "/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS/data/raw/corruptmnist"

train_files, test_files = [], []

for root, dirs, files in os.walk(data_path):
    for file in files:
        if file[:5] == "train":
            train_files.append(np.load(os.path.join(root,file)))
        elif file[:4] == "test":
            test_files.append(np.load(os.path.join(root,file)))

# Extract training images and concatenate these into a [25000, 28, 28] numpy ndarray
train_images = [f["images"] for f in train_files]
train_images = np.concatenate(train_images)

scaler = StandardScaler()

# This normalization is performed on a reshaped array of size [25000, 784] such that each pixel feature is normalized cf. the feature mean and standard deviation
train_images = scaler.fit_transform(train_images.reshape(train_images.shape[0],
                                    train_images.shape[1]*train_images.shape[2])).reshape(train_images.shape)

# Add the channel dimension. The resulting dimensions are (25000, 1, 28, 28)
train_images = torch.from_numpy(train_images).unsqueeze_(1)

# Extract training labels and concatenate these into a [25000,] numpy ndarray
train_labels = [f["labels"] for f in train_files]
train_labels = np.concatenate(train_labels)
train_labels = torch.from_numpy(train_labels)

# Extract test images and concatenate these into a [25000, 28, 28] numpy ndarray
test_images = test_files[0]["images"]
test_images = scaler.transform(test_images.reshape(test_images.shape[0], test_images.shape[1]*test_images.shape[2])).reshape(test_images.shape)
# Add the channel dimension. The resulting dimensions are (5000, 1, 28, 28)
test_images = torch.from_numpy(test_images).unsqueeze_(1)
# Extract test labels and concatenate these into a [25000,] numpy ndarray
test_labels = test_files[0]["labels"]
test_labels = torch.from_numpy(test_labels)

train = {"images": train_images, "labels": train_labels}
test = {"images": test_images, "labels": test_labels}

torch.save(train, "/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS/data/processed/train.pt")
torch.save(test, "/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS/data/processed/test.pt")


In [114]:
train = torch.load("/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS/data/processed/test.pt")

In [118]:
train["labels"].shape

torch.Size([25000])

In [111]:
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)

torch.Size([25000, 1, 28, 28])
torch.Size([25000])
torch.Size([5000, 1, 28, 28])


In [106]:
torch.stack((train_images, train_labels))

RuntimeError: stack expects each tensor to be equal size, but got [25000, 1, 28, 28] at entry 0 and [25000] at entry 1

In [18]:
import torch 
import sys
sys.path.append("/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS")

from src.models.model import MyAwesomeModel

In [19]:
model = MyAwesomeModel()
state_dict = torch.load("/mnt/c/Users/Lasse/Desktop/DTU/7. semester/MLOps/MLOPS/models/checkpoint.pth")
model.load_state_dict(state_dict)

<All keys matched successfully>

In [20]:
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        print(name, "is a convolutional layer")
    if isinstance(module, torch.nn.Linear):
        print(name, "is a linear layer")

layer1.0 is a convolutional layer
layer2.0 is a convolutional layer
fc is a linear layer


In [21]:
model.layer1

Sequential(
  (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)