<a href="https://colab.research.google.com/github/mgetsova/GoogleCollab/blob/main/CNN_smaller_cifar10_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# for some reason I only have CPU sooo...idk, going to try to use less data
# here for instructive purposes anyway

# device will determine whether to run the training on GPU or CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
'''
step 1: download and initialize dataset
'''
# import CIFAR-10 dataset from huggingface
from datasets import load_dataset
dataset_train = load_dataset(
    'cifar10',
    split = 'train', # training dataset
    ignore_verifications = True # set to True if seeing splits Error
)
dataset_train

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/5.16k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/120M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/23.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'label'],
    num_rows: 50000
})

In [10]:
type(dataset_train)

In [13]:
'''
the dataset is an arrow type object and you can see how you can
subset based on row indicies or label filters (which can be useful)
here: https://stackoverflow.com/questions/72891947/how-do-you-get-a-subset-of-a-python-arrow-dataset-dataset-based-on-a-list-of-ind
'''

subset_dataset_train = dataset_train.select(range(5000)) # selects rows with idx up to 5000?

In [15]:
print(subset_dataset_train.shape) # cool, seems to work!

(5000, 2)


In [18]:
# get validation dataset
dataset_val = load_dataset(
    'cifar10',
    split = 'test', # test dataset
    ignore_verifications = True # set to True if seeing splits Error
)
dataset_val



Dataset({
    features: ['img', 'label'],
    num_rows: 10000
})

In [19]:
# use only the first 1000 images for validation set
subset_dataset_val = dataset_val.select(range(1000))

In [20]:
# set all images to same size:
img_size = 32

# preprocess variable, to be used ahead
preprocess = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor()
])


In [21]:
# change any greyscale images to RGB, using subset now,
# why not just make it a function tbh
from tqdm.auto import tqdm

inputs_train = []

for record in tqdm(subset_dataset_train):
  image = record['img']
  label = record['label']

  # convert from greyscale to RGB
  if image.mode == 'L':
    image = image.convert("RGB")

  # preprocessing
  input_tensor = preprocess(image)

  # append to batch list
  inputs_train.append([input_tensor, label])

# now we have 32x32 pixel RGB images (3d tensor with dims 3, 32, 32)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [22]:
# get mean and std values for each of the RGB channels accross all images
import numpy as np

np.random.seed(0)

# calculate the mean and std of images, first start by choosing a random
# sample ?
idx = np.random.randint(0, len(inputs_train), 512)
# concatenate this subset of image tensors
tensors = torch.concat([inputs_train[i][0] for i in idx], axis = 1)
# we get a tensor with dims (3, 16384 (this is 512x32), 32) so
# like 512 stacked images?

# merge all values into a single 3-channel vector
tensors = tensors.swapaxes(0, 1).reshape(3, -1).T
# we get a tensor with dims (524288 (this is 512x32x32), 3) which is just
# weird but doesn't matter because we delete it
mean = torch.mean(tensors, axis = 0)
std = torch.std(tensors, axis = 0)
del tensors


# now use the mean [0.4670, 0.4735, 0.4662] and the std [0.2496, 0.2489, 0.2521]
# to normalize via another preprocessing step

preprocess = transforms.Compose([transforms.Normalize(mean=mean, std=std)])
for i in tqdm(range(len(inputs_train))):
  # preprocessing
  input_tensor = preprocess(inputs_train[i][0])

  # append to batch list
  inputs_train[i][0] = input_tensor # replace with normalized tensor

  0%|          | 0/5000 [00:00<?, ?it/s]

In [23]:
# we kinda wanna do the same thing for the validation set
# Usually you would want to merge the preprocessing steps together :
preprocess = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])
# except the one above has the normalize part?

# same as before:
inputs_val = []

for record in tqdm(subset_dataset_val):
  image = record['img']
  label = record['label']

  # convert from greyscale to RGB
  if image.mode == 'L':
    image = image.convert("RGB")

  # preprocessing
  input_tensor = preprocess(image)

  # append to batch list
  inputs_val.append([input_tensor, label])

  0%|          | 0/1000 [00:00<?, ?it/s]

In [24]:
batch_size = 64

# Use DataLoader to split both the training and validation dataset into shuffled
# batches. Shuffle helps prevent overfitting
dloader_train = torch.utils.data.DataLoader(
    inputs_train, batch_size = batch_size, shuffle = True
)
dloader_val = torch.utils.data.DataLoader(
    inputs_val, batch_size = batch_size, shuffle = False
)


# so up until this point we have been doing preprocessing on forms of the data
# that all follow from the dataset_train and dataset_val, and we probably want
# to try using only every 10th image in each of those to speed up
# experimentation


**Building the CNN's Architecture**

In [25]:
# creating a CNN class:
class ConvNeuralNet(nn.Module):
  def __init__(self, num_classes):
    super(ConvNeuralNet, self).__init__()
    self.conv_layer1 = nn.Conv2d(in_channels = 3, out_channels = 64,
                                 kernel_size = 4, padding = 1)
    self.relu1 = nn.ReLU()
    self.max_pool1 = nn.MaxPool2d(kernel_size = 3, stride = 2)

    self.conv_layer2 = nn.Conv2d(in_channels = 64, out_channels = 192,
                                 kernel_size = 4, padding = 1)
    self.relu2 = nn.ReLU()
    self.max_pool2 = nn.MaxPool2d(kernel_size = 3, stride = 2)

    self.conv_layer3 = nn.Conv2d(in_channels = 192, out_channels = 384,
                                 kernel_size = 3, padding = 1)
    self.relu3 = nn.ReLU()

    self.conv_layer4 = nn.Conv2d(in_channels = 384, out_channels = 256,
                                kernel_size = 3, padding = 1)
    self.relu4 = nn.ReLU()

    self.conv_layer5 = nn.Conv2d(in_channels = 256, out_channels = 256,
                                 kernel_size = 3, padding = 1)
    self.relu5 = nn.ReLU()
    self.max_pool5 = nn.MaxPool2d(kernel_size = 3, stride = 2)

    self.dropout6 = nn.Dropout(p = 0.5)
    self.fc6 = nn.Linear(1024, 512)
    self.relu6 = nn.ReLU()
    self.dropout7 = nn.Dropout(p=0.5)
    self.fc7 = nn.Linear(512, 256)
    self.relu7 = nn.ReLU()
    self.fc8 = nn.Linear(256, num_classes)

  # progress data acress layers, this basically
  # defines the order in which the above operations are
  # done
  def forward(self, x):
    out = self.conv_layer1(x)
    out = self.relu1(out)
    out = self.max_pool1(out)

    out = self.conv_layer2(out)
    out = self.relu2(out)
    out = self.max_pool2(out)

    out = self.conv_layer3(out)
    out = self.relu3(out)

    out = self.conv_layer4(out)
    out = self.relu4(out)

    out = self.conv_layer5(out)
    out = self.relu5(out)
    out = self.max_pool5(out)

    out = out.reshape(out.size(0), -1)

    out = self.dropout6(out)
    out = self.fc6(out)
    out = self.relu6(out)

    out = self.dropout7(out)
    out = self.fc7(out)
    out = self.relu7(out)

    out = self.fc8(out) # final logits
    return out


In [28]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
num_classes = len(set(subset_dataset_train['label']))
model = ConvNeuralNet(num_classes).to(device)

In [29]:
# set loss function with criterion
loss_func = nn.CrossEntropyLoss()

# set learning rate
lr = 0.008

# set optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = lr)

See how long each epoch takes on the reduced dataset sizes below:

Not sure if can run both models at the same time without running out of RAM...
Yes but val accuracy was not changing so it seems something about the way we selected the first 10th of each dataset may be problematic

Also, it still takes over an hour, try larger batch size and/or fewer epochs maybe (iirc, it does number of batches per epoch so 64 sets of randomly selected subsets of the images for training per epoch)

In [30]:
# train an valiate the network
num_epochs = 50
for epoch in range(num_epochs):
  model.train()
  # load in the data in batches
  for i, (images, labels) in enumerate(dloader_train):
    # send images and labels to device
    # move tensors to the configured device
    images = images.to(device)
    labels = labels.to(device)

    # forward propagation
    outputs = model(images)
    loss = loss_func(outputs, labels)

    # backward propagation and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  # the end of the epoch check validation loss and accuracy on validation
  with torch.no_grad():
      # switch to evaluating the model and not training it
    model.eval()
    correct = 0
    total = 0
    all_val_loss = []
    for images, labels in dloader_val:
      iamges = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      total += labels.size(0)
      # calcualte predictions
      predicted = torch.argmax(outputs, dim = 1)
      # calculate actual values
      correct += (predicted == labels).sum().item()
      # calculate the loss
      all_val_loss.append(loss_func(outputs, labels).item())
    # calculate val-loss
    mean_val_loss = sum(all_val_loss) / len(all_val_loss)
    # calculate val-accuracy
    mean_val_acc = 100 * (correct / total)

  print(
      'Epoch [{}/{}], Loss: {:.4f}, Val-loss: {:.4f}, Val-acc:{:.2f}'
      .format(epoch + 1, num_epochs, loss.item(), mean_val_loss, mean_val_acc)
  )

Epoch [1/50], Loss: 2.2954, Val-loss: 2.3015, Val-acc:10.60
Epoch [2/50], Loss: 2.3066, Val-loss: 2.3016, Val-acc:10.60
Epoch [3/50], Loss: 2.2959, Val-loss: 2.3017, Val-acc:10.60
Epoch [4/50], Loss: 2.2986, Val-loss: 2.3017, Val-acc:10.60
Epoch [5/50], Loss: 2.2916, Val-loss: 2.3018, Val-acc:10.60
Epoch [6/50], Loss: 2.2914, Val-loss: 2.3018, Val-acc:10.60
Epoch [7/50], Loss: 2.3138, Val-loss: 2.3019, Val-acc:10.60
Epoch [8/50], Loss: 2.3064, Val-loss: 2.3019, Val-acc:10.60
Epoch [9/50], Loss: 2.3070, Val-loss: 2.3019, Val-acc:10.60
Epoch [10/50], Loss: 2.3013, Val-loss: 2.3020, Val-acc:10.60
Epoch [11/50], Loss: 2.2807, Val-loss: 2.3020, Val-acc:10.60
Epoch [12/50], Loss: 2.3044, Val-loss: 2.3020, Val-acc:10.60


KeyboardInterrupt: 