### Importing Libraries and Setting Up Paths


In [1]:
import os
import numpy as np
import shutil
import random
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torchvision
import psutil
import tensorflow as tf

cpus = psutil.cpu_count()
print(f"Available CPUs: {cpus}")
print(f"Available memory: {psutil.virtual_memory().available} bytes")
print(torch.__version__)
print(torchvision.__version__)

2024-05-28 16:54:09.903873: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Available CPUs: 8
Available memory: 10800119808 bytes
2.3.0+cu121
0.18.0+cu121


### Paths to Directories

In [3]:
raw_data_dir = "../data/raw"
processed_data_dir = "../data/processed"

In [4]:
if not os.path.exists(processed_data_dir):
    os.makedirs(processed_data_dir)

train_dir = os.path.join(processed_data_dir, "train")
test_dir = os.path.join(processed_data_dir, "test")

if not os.path.exists(train_dir):
    os.makedirs(train_dir)

if not os.path.exists(test_dir):
    os.makedirs(test_dir)

### Copying and Shuffling Files

In [5]:
sub_dirs = ["bird", "nonbird"]


def copy_and_shuffle_files(src_dir, dest_dir, train_ratio=0.7, val_ratio=0.15):
    """Copy files into training, validation, and testing directories with specified ratios."""
    for sub_dir in sub_dirs:
        full_sub_dir = os.path.join(src_dir, sub_dir)
        files = [
            os.path.join(full_sub_dir, f)
            for f in os.listdir(full_sub_dir)
            if os.path.isfile(os.path.join(full_sub_dir, f))
        ]
        random.shuffle(files)

        n_train = int(len(files) * train_ratio)
        n_val = int(len(files) * val_ratio)

        train_files = files[:n_train]
        val_files = files[n_train : n_train + n_val]
        test_files = files[n_train + n_val :]

        for phase, file_set in zip(
            ["train", "val", "test"], [train_files, val_files, test_files]
        ):
            dest_sub_dir = os.path.join(dest_dir, phase, sub_dir)
            if not os.path.exists(dest_sub_dir):
                os.makedirs(dest_sub_dir)
            for file in file_set:
                shutil.copy(file, dest_sub_dir)


copy_and_shuffle_files(raw_data_dir, processed_data_dir)

### Rescaling Images

In [7]:
def rescale_images_in_dir(src_dir, size):
    """Rescale images in directory to given size."""
    for root, dirs, files in os.walk(src_dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            if os.path.isfile(file_path):
                with Image.open(file_path) as img:
                    img_rescaled = img.resize(size)
                    img_rescaled.save(file_path)


rescaled_size = (224, 224)
rescale_images_in_dir(processed_data_dir, rescaled_size)