In [2]:
import os
import random
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [3]:
def set_seed(seed=59):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 59
set_seed(seed)
print(f"Using seed {seed}")

Using seed 59


### Dataset structure and location

In [4]:
root_dir = Path('./data/scenes_classification')
train_dir = root_dir / 'train'
val_dir = root_dir / 'val'

classes = {
    label_idx: class_name
    for label_idx, class_name in enumerate(sorted(os.listdir(train_dir)))
}

X_train, y_train = [], []
X_test, y_test = [], []

for dataset_path in [train_dir, val_dir]:
    for label_idx, class_name in classes.items():
        class_dir = dataset_path / class_name
        if not class_dir.exists():
            continue
        for img_filename in sorted(class_dir.iterdir()):
            if not img_filename.is_file():
                continue
            img_path = str(img_filename)
            if 'train' in str(dataset_path):
                X_train.append(img_path)
                y_train.append(label_idx)
            else:
                X_test.append(img_path)
                y_test.append(label_idx)

print('Train images:', len(X_train))
print('Validation/Test candidates (from val folder):', len(X_test))

Train images: 14034
Validation/Test candidates (from val folder): 3000
