In [2]:
# In this notebook we will learn how to work on our own datasets, instead of using Pytorch datasets
import torch
from torch import nn

In [3]:
# we will make our code device-agnostic
device = 'cuda' if torch.cuda.is_available() else 'cup' 
device

'cup'

In [7]:
# we will download a subset of food 101 for this project
import requests
import zipfile
from pathlib import Path

# setup a path to the datafolder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

# check if image folder exists, if not, download and prepare it
if image_path.is_dir():
    print(f"{image_path} directory already exists.")
else:
    print(f"didn't find {image_path} directory. creating one ...")
    image_path.mkdir(parents=True, exist_ok=True)

    # after creating the directory, we should download data to it
    with open(data_path / "pizza_steak_suchi.zip", "wb") as f:
        request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
        print("downloading pizza, steak, sushi data")
        f.write(request.content)


    # unzip the downloaded data
    with zipfile.ZipFile(data_path / "pizza_steak_suchi.zip", "r") as zip_ref:
        print("unzipping the data")
        zip_ref.extractall(image_path)


didn't find data/pizza_steak_sushi directory. creating one ...
downloading pizza, steak, sushi data
unzipping the data


In [8]:
# now we should prepare and explore our data
# take this data storage structure and turn it into a dataset usable with PyTorch.
# in order to loop through the directory we use os.walk

import os
def walk_through_dir(dir_path):
    """walks through dir_path and returns its content
    
    Args:
        dir_path (str or pathlib.Path): target directory
        
    Returns:
        number of subdirectories in dir_path
        number of images(files) in each subdirectory
        name of each subdirectory
    """

    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in {dirpath}. ")

walk_through_dir(image_path)

There are 2 directories and 0 images in data/pizza_steak_sushi. 
There are 3 directories and 0 images in data/pizza_steak_sushi/test. 
There are 0 directories and 19 images in data/pizza_steak_sushi/test/steak. 
There are 0 directories and 31 images in data/pizza_steak_sushi/test/sushi. 
There are 0 directories and 25 images in data/pizza_steak_sushi/test/pizza. 
There are 3 directories and 0 images in data/pizza_steak_sushi/train. 
There are 0 directories and 75 images in data/pizza_steak_sushi/train/steak. 
There are 0 directories and 72 images in data/pizza_steak_sushi/train/sushi. 
There are 0 directories and 78 images in data/pizza_steak_sushi/train/pizza. 


In [9]:
# now we can set up train path and test path
train_dir = image_path / "train"
test_dir = image_path / "test" 

test_dir, train_dir

(PosixPath('data/pizza_steak_sushi/test'),
 PosixPath('data/pizza_steak_sushi/train'))