In [None]:
# !wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
# !unzip nature_12K.zip

In [1]:
import yaml
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset,DataLoader, random_split

from torchvision import transforms
#from torchvision.io import read_image

In [2]:
config = yaml.safe_load(open("config/default.yaml"))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
class CustomDataset(Dataset):
    def __init__(self, path, class_names = None, transform=None, target_transform=None):
        super().__init__()
        self.path = path
        self.class_names = class_names
        self.transform = transform
        self.target_transform = target_transform

        if self.class_names is None:
            self.class_names =[x for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
        
        self.images = []
        self.labels = []

        for i, cls in enumerate(self.class_names):
            img_dir = os.path.join(self.path, cls)
            for f in os.listdir(img_dir):
                self.images.append(os.path.join(img_dir, f))
                self.labels.append(i)


    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = Image.open(self.images[index])
        label = self.labels[index]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


### DataLoader Creation

In [4]:
transform = transforms.Compose([
        transforms.Resize(tuple(config['dataset']['img_size'])),
        transforms.ToTensor()
    ])
target_transform = None
train_path = config['dataset']['path']+"train"
test_path = config['dataset']['path']+"val"

trainds = CustomDataset(path=train_path,
                        class_names=config['dataset']['class_names'],
                        transform=transform,
                        target_transform=target_transform
                        )

val_split = int(0.2 * len(trainds))
trainds, valds = random_split(trainds, [len(trainds) - val_split, val_split])
testds = CustomDataset(path=test_path,
                        class_names=config['dataset']['class_names'],
                        transform=transform,
                        target_transform=target_transform
                        )
traindl = DataLoader(trainds, batch_size=config['dataset']['batch_size'])
valdl = DataLoader(valds, batch_size=config['dataset']['batch_size'])
testdl = DataLoader(testds, batch_size=config['dataset']['batch_size'])
print(len(trainds), len(valds), len(testds))
for x, y in traindl:
    print(x.shape, y.shape)
    break

8000 2000 2000
torch.Size([500, 3, 256, 256]) torch.Size([500])


In [5]:
from models.simplemodel import SmallCNN
model = SmallCNN(config['model'])
print(model)

SmallCNN(
  (conv_blocks): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=32, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
)


In [None]:
out = model(x)
out.shape