<a href="https://colab.research.google.com/github/lanehale/airline-chatbot/blob/main/pytorch06_ex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
""" We're well beyond those versions now, no need to download nightly versions
try:
  import torch
  import torchvision
  assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
  assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
except:
  print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
  !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
  import torch
  import torchvision
"""
import torch
import torchvision
print(f"torch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")

In [None]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
  from torchinfo import summary
except:
  print("[INFO] Couldn't find torchinfo... installing it.")
  !pip install -q torchinfo
  from torchinfo import summary

In [None]:
# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
  from going_modular import data_setup, engine
except:
  """
  This block attempts to download a GitHub repository,
  move a specific directory from the downloaded repository to the current working directory,
  and then remove the downloaded repository.
  """
  # Get the going_modular scripts
  print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")

  # Clone the git repository
  !git clone https://github.com/lanehale/pytorch-deep-learning

  # When cloning a GitHub repository, the directory structure on your local machine doesn't include /tree/main/, so it shouldn't be included in the mv command.
  # The . at the end of the command tells mv to move the specified directory into the current working directory.
  !mv pytorch-deep-learning/going_modular .

  # remove the downloaded repository
  !rm -rf pytorch-deep-learning

  from going_modular import data_setup, engine

In [None]:
!ls

In [None]:
!mv going_modular/train.py .
!mv going_modular/predict.py .
!ls

In [None]:
!ls going_modular/

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
"""
Ex 5. Train the model with more data, say 20% of the images from Food101 of Pizza, Steak and Sushi images
      (My get_data.py already uses the larger, 20% dataset)
"""
# Get images
!python going_modular/get_data.py

In [None]:
from pathlib import Path
image_path = Path("data/pizza_steak_sushi")

# Set up dirs
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

In [None]:
# Create a transforms pipeline manually (required for torchvision < 0.13)
manual_transforms = transforms.Compose([
    transforms.Resize((224,224)),  # 1. Reshape all images to 224x224 (though some models may require different sizes)
    transforms.ToTensor(),         # 2. Turn image values to between 0 & 1
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # 3. A mean of [...] across each color channel
                         std=[0.229, 0.224, 0.225])   # A standard deviation of [...] across each color channel
])

In [None]:
# Create training and testing DataLoaders as well as get a list of class names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,  # resize, convert images to between 0 & 1 and normalize them
    batch_size=32
)
train_dataloader, test_dataloader, class_names

In [None]:
""" As of torchvision v0.13+, an automatic transform creation feature has been added. """
# Get a set of pretrained model weights
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT  # .DEFAULT = best available weights from pretraining on ImageNet
weights

In [None]:
"""
And now to access the transforms associated with our weights, we can use the transforms() method.
This is essentially saying "get the data transforms that were used to train the EfficientNet_B0_Weights on ImageNet".
"""
# Get the transforms used to create our pretrained weights
auto_transforms = weights.transforms()
auto_transforms

In [None]:
manual_transforms

In [None]:
# Create training and testing DataLoaders as well as get a list of class names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms,  # perform the same data transforms on our training data as the pretrained model
    batch_size=32
)
train_dataloader, test_dataloader, class_names

In [None]:
# OLD: Set up the model with pretrained weights and send it to the target device (this was prior to torchvision v0.13)
# model = torchvision.models.efficientnet_b0(pretrained=True).to(device) # OLD method (with pretrained=True)

# NEW: Set up the model with pretrained weights and send it to the target device (torchvision v0.13+)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT  # .DEFAULT = best available weights
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

#model # uncomment to output (it's very long) and see dropout rate and number of in_features

In [None]:
# Print a summary using torchinfo
summary(model=model,
        input_size=(32, 3, 224, 224),
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
# Freeze all base layers in the "features" section of the model (the feature extractor) by setting requires_grad=False
for param in model.features.parameters():
  param.requires_grad = False

In [None]:
""" Adjust the output layer or the classifier portion of our pretrained model to our needs (out_features=3). """
# Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get the length of class_names (one output unit for each class)
output_shape = len(class_names)

# Recreate the classifier layer and seed it to the target device
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280,
                    out_features=output_shape,  # same number of output units as our number of classes
                    bias=True)).to(device)

In [None]:
# Do a summary *after* freezing the features and changing the output classifier layer
summary(model=model,
        input_size=(32, 3, 224, 224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
""" Because we're still working with multi-class classification, we'll use "CrossEntropyLoss" as our loss function. """
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
"""
Ex 4. Train the model for longer (10 epochs should do)
"""
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

""" Note: We're only going to be training the parameters classifier here as all of the other parameters in our model have been frozen. """
# Set up training and save the results (The train() function is in the engine.py script inside the going_modular directory.)
results = engine.train(model=model,
                       train_dataloader=train_dataloader,
                       test_dataloader=test_dataloader,
                       optimizer=optimizer,
                       loss_fn=loss_fn,
                       epochs=10,
                       device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

In [None]:
# Get the plot_loss_curves() function from helper_functions.py, download the file if we don't have it
try:
  from helper_functions import plot_loss_curves
except:
  print("[INFO] Couldn't find helper_functions.py, downloading...")
  with open("helper_functions.py", "wb") as f:
    import requests
    request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    f.write(request.content)
  from helper_functions import plot_loss_curves

# Plot the loss curves of our model
plot_loss_curves(results)

In [None]:
from typing import List, Tuple
from PIL import Image

# 1. Take in a trained model, class names, image path, image size, a transform and target device
def pred_and_plot_image(model: torch.nn.Module,
                        image_path: str,
                        class_names: List[str],
                        image_size: Tuple[int, int] = (224, 224),
                        transform: torchvision.transforms = None,
                        device: torch.device=device):
  # 2. Open image
  img = Image.open(image_path)

  # 3. Create transformation for image (if one doesn't exist)
  if transform is not None:
    image_transform = transform
  else:
    image_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

  ### Predict on image ###

  # 4. Make sure the model is on the target device
  model.to(device)

  # 5. Turn on model evaluation mode and inference mode
  model.eval()
  with torch.inference_mode():
    # 6. Transform and add an extra dimension to image (model requires samples in [batch_size, color_channels, height, width])
    transformed_image = image_transform(img).unsqueeze(dim=0)

    # 7. Make a prediction on image with an extra dimension and send it to the target device
    target_image_pred = model(transformed_image.to(device))

  # 8. Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
  target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

  # 9. Convert prediction probabilities -> prediction labels
  target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)

  # 10. Plot image with predicted label and probability
  plt.figure()
  plt.imshow(img)
  plt.title(f"Pred: {class_names[target_image_pred_label]} | Prob: {target_image_pred_probs.max():.3f}")
  plt.axis(False);

In [None]:
# Get a random list of image paths from test set
import random
num_images_to_plot = 5
test_image_path_list = list(Path(test_dir).glob("*/*.jpg"))              # get list of all image paths from test data
test_image_path_sample = random.sample(population=test_image_path_list,  # go through all of the test image paths
                                       k=num_images_to_plot)             # randomly select 'k' image paths to pred and plot

# Make predictions on and plot the images
for image_path in test_image_path_sample:
  pred_and_plot_image(model=model,
                      image_path=image_path,
                      class_names=class_names,
                      # transform=weights.transforms(),  # optionally pass in a specified transform from our pretrained model weights
                      image_size=(224, 224))

In [None]:
"""
Ex 3. Predict on your own image of pizza/steak/sushi
"""
# Get custom images
!python going_modular/get_custom_data.py

In [None]:
!ls data

In [None]:
data_path = Path("data")

filenames = [
    "cheese-pizza.jpeg",
    "pizza-slice.jpeg",
    "pizza-slice2.jpeg",
    "pizza-sliced.jpeg",
    "pizza-sliced2.jpeg",
    "pizza-partial-view.jpeg",
    "pizza-partial-view2.jpeg",
    "pizza-side-view.jpeg"
]

for f in filenames:
  # Set custom image path
  custom_image_path = data_path / f
  # Predict on custom image
  pred_and_plot_image(model=model,
                      image_path=custom_image_path,
                      class_names=class_names,
                      image_size=(224, 224))

In [None]:
"""
The length of a DataLoader in Python is determined by the number of batches it will produce from the dataset.
This number is calculated by dividing the total number of samples in the dataset by the batch size and potentially
applying a rounding operation depending on the drop_last argument.
"""
len(test_dataloader)  # 150 / 32 = 5

In [None]:
"""
Ex 1. Make predictions on the entire test dataset and plot a confusion matrix for the results of our model compared to the truth labels.
"""
# Import tqdm for progress bar
from tqdm.auto import tqdm

# 1. Make predictions with trained model
y_preds = []
model.eval()
with torch.inference_mode():
  for X, y in tqdm(test_dataloader, desc="Making predictions"):
    # Send data and targets to target device
    X, y = X.to(device), y.to(device)

    # Do the forward pass
    y_logit = model(X)

    # Turn predictions from logits to probabilities to labels
    y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1)  # note: perform softmax on the "logits" dimension, not "batch" dimension
                                                          # (in this case we have a batch size of 32, so can perform on dim=1)
    # Put predictions on CPU for evaluation
    y_preds.append(y_pred.cpu())

  # Concatenate list of predictions into a tensor
  y_pred_tensor = torch.cat(y_preds)

print(y_pred_tensor)
y_pred_tensor.shape, len(test_dataloader.dataset)

In [None]:
# See if torchmetrics exists, if not, install it
try:
  import torchmetrics, mlxtend
except:
  !pip install -q torchmetrics -U mlxtend
  import torchmetrics, mlxtend
print(f"mlxtend version: {mlxtend.__version__}")

In [None]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

# 2. Setup confusion matrix instance and compare predictions to targets
confmat = ConfusionMatrix(num_classes=len(class_names), task='multiclass')
print(confmat)

# Get truth labels for test dataset
test_truth = torch.cat([y for X, y in test_dataloader])
print(test_truth)

# Convert the target list to a tensor
target_tensor = torch.tensor(test_dataloader.dataset.targets)

confmat_tensor = confmat(preds=y_pred_tensor,
                         target=target_tensor)  # Use the converted tensor
print(target_tensor)
s = "is" if torch.equal(test_truth, target_tensor) else "is NOT"
print(f"test_truth {s} equal to target_tensor")
#print(torch.eq(test_truth, target_tensor))  # this compares each element in the two tensors
print(target_tensor.shape)
print(confmat_tensor)

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(),
    class_names=class_names,
    figsize=(10, 7)
);

In [None]:
"""
Ex 2. Get the "most wrong" of the predictions on the test dataset and plot the 5 "most wrong" images.
"""
# Create a function to return a list of dictionaries with sample, label, prediction, pred prob
def predict_and_store(model, test_paths, tranform, class_names, device):
  pred_list = []
  test_preds = []
  for path in test_paths:
    # Create empty dict to store info for each sample
    pred_dict = {}

    # Save sample path
    pred_dict["image_path"] = path

    # Save class name
    class_name = path.parent.stem
    pred_dict["class_name"] = class_name

    # Save prediction and pred prob
    from PIL import Image
    img = Image.open(path)
    transformed_image = tranform(img).unsqueeze(dim=0).to(device)  # transform image and add batch dimension
    model.eval()
    with torch.inference_mode():
      pred_logit = model(transformed_image.to(device))
      pred_prob = torch.softmax(pred_logit, dim=1)
      pred_label = torch.argmax(pred_prob, dim=1)
      pred_class = class_names[pred_label.cpu()]  # or can replace .cpu()] with .item

      #pred_dict["pred_prob"] = pred_prob.cpu()  # prediction probability
      pred_dict["pred_prob"] = pred_prob.max().item()  # Use .item() to get the Python number
      pred_dict["pred_class"] = pred_class      # predicted class name

      test_preds.append(pred_label.cpu())

    # Does the prediction match the true label?
    pred_dict["correct"] = class_name == pred_class
    # print(pred_dict)

    # Add sample dict to list of preds
    pred_list.append(pred_dict)

    test_preds_tensor = torch.cat(test_preds)

  return pred_list, test_preds_tensor

In [None]:
# Get list of all image paths from test data
from pathlib import Path
test_image_path_list = list(Path(test_dir).glob("*/*.jpg"))
# test_labels = [path.parent.stem for path in test_image_path_list]

simple_tranform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

pred_list, test_preds_tensor = predict_and_store(
    model=model,
    test_paths=test_image_path_list,
    tranform=manual_transforms, # manual_transforms only 9 False, auto_transforms 12 False, #simple_tranform isn't right (many False)
    class_names=class_names,
    device=device
)
pred_list[:5], test_preds_tensor

In [None]:
confmat_tensor = confmat(preds=test_preds_tensor,
                         target=target_tensor)  # Use the converted tensor

s = "is" if torch.equal(test_preds_tensor, y_pred_tensor) else "is NOT"
print(f"test_preds_tensor {s} equal to y_pred_tensor")
print(test_preds_tensor)
print(y_pred_tensor)

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(),
    class_names=class_names,
    figsize=(10, 7)
);

In [None]:
# Turn pred_list (test pred dicts) into a DataFrame
import pandas as pd
test_pred_df = pd.DataFrame(pred_list)

# Sort DataFrame by 'correct' then by 'pred_prob'
test_pred_df.sort_values(by=["correct", "pred_prob"], ascending=[True, False], inplace=True)  # Add inplace=True to modify the DataFrame directly
#test_pred_df.head()
test_pred_df[:20]

In [None]:
# Plot the top 5 most wrong images
import torchvision
import matplotlib.pyplot as plt

top_5_most_wrong = test_pred_df[:5]

for row in top_5_most_wrong.iterrows():
  row = row[1]
  image_path = row["image_path"]
  true_label = row["class_name"]
  pred_class = row["pred_class"]
  pred_prob = row["pred_prob"]

  img = torchvision.io.read_image(str(image_path)).permute(1, 2, 0)  # get image as tensor and permute to [height, width, color_channels]
  plt.imshow(img)
  plt.title(f"True: {true_label} | Pred: {pred_class} | Prob: {pred_prob:.3f}")
  plt.axis(False)
  plt.show()

In [None]:
# Sorting without pandas
sorted_pred_list = sorted(pred_list, key=lambda x: (x['correct']==False, x['pred_prob']), reverse=True,)
sorted_pred_list[:20]

In [None]:
"""
Ex 6. Try a different model from torchvision.models on the Pizza, Steak, Sushi data
"""
weights_B2 = torchvision.models.EfficientNet_B2_Weights.DEFAULT  # .DEFAULT = best available weights from pretraining on ImageNet
weights_B2

In [None]:
auto_transforms_B2 = weights_B2.transforms()
auto_transforms_B2

In [None]:
# Create training and testing DataLoaders and get a list of class names
train_dataloader_B2, test_dataloader_B2, class_names_B2 = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms_B2,  # perform the same data transforms on our training data as the pretrained model
    batch_size=32
)
train_dataloader_B2, test_dataloader_B2, class_names_B2

In [None]:
# NEW: Set up the model with pretrained weights and send it to the target device (torchvision v0.13+)
model_B2 = torchvision.models.efficientnet_b2(weights=weights_B2).to(device)

# View it to see dropout rate and number of in_features
#model_B2
"""
  (classifier): Sequential(
    (0): Dropout(p=0.3, inplace=True)
    (1): Linear(in_features=1408, out_features=1000, bias=True)
  )
"""

In [None]:
# Print a summary using torchinfo
summary(model=model_B2,
        input_size=(32, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
# Freeze all base layers in the "features" section of the model
for parm in model_B2.features.parameters():
  parm.requires_grad = False

In [None]:
""" Adjust the output layer or the classifier portion of our pretrained model to our needs (out_features=3). """
# Set manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get the length of class_names (one output unit for each class)
output_shape = len(class_names_B2)

# Recreate the classifier layer and seed it to the target device
"""
For EfficientNetV2 models, a dropout rate of 0.2 is often a good starting point, as suggested by some EfficientNetV2 developers and other sources.
However, the ideal dropout value can depend on factors like the specific model size, the training data, and the regularization strategy used.
"""
model_B2.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.3, inplace=True),      # find this by displaying model_B2
    torch.nn.Linear(in_features=1408,           # also found in the model_B2 view
                    out_features=output_shape,  # same number of output units as our number of classes
                    bias=True)).to(device)

# View the summary after freezing the features and changing the output classifier layer
summary(model=model_B2,
        input_size=(32, 3, 224, 224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
""" Train the model """
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

optimizer_b2 = torch.optim.Adam(model_B2.parameters(), lr=0.001)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

""" Note: We're only going to be training the parameters classifier here as all of the other parameters in our model have been frozen. """
# Set up training and save the results
results_B2 = engine.train(model=model_B2,
                          train_dataloader=train_dataloader_B2,
                          test_dataloader=test_dataloader_B2,
                          optimizer=optimizer_b2,
                          loss_fn=loss_fn,
                          epochs=10,
                          device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total running time: {end_time - start_time:.3f} seconds")

In [None]:
class_names_B2

In [None]:
# Make predictions and store in a list of dictionaries
pred_list_B2, test_preds_tensor_B2 = predict_and_store(
    model=model_B2,
    test_paths=test_image_path_list,
    tranform=auto_transforms_B2,
    class_names=class_names_B2,
    device=device
)
pred_list_B2[:5], test_preds_tensor_B2

In [None]:
# Setup confusion matrix instance and compare predictions to targets
confmat = ConfusionMatrix(num_classes=len(class_names_B2), task='multiclass')

# Get truth labels for test dataset
test_truth_B2 = torch.cat([y for X, y in test_dataloader_B2])

confmat_tensor_B2 = confmat(preds=test_preds_tensor_B2,
                            target=test_truth_B2)

# Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor_B2.numpy(),
    class_names=class_names_B2,
    figsize=(10, 7)
);

In [None]:
# Sort the list of dictionaries
sorted_pred_list_B2 = sorted(pred_list_B2, key=lambda x: (x['correct']==False, x['pred_prob']), reverse=True)

# Turn sorted list into a DataFrame of top 5 wrong
test_pred_df_B2 = pd.DataFrame(sorted_pred_list_B2[:5])
test_pred_df_B2_ = pd.DataFrame(sorted_pred_list_B2[:10])
test_pred_df_B2_

In [None]:
# Plot the 5 most wrong images
for row in test_pred_df_B2.iterrows():
  row = row[1]
  image_path = row["image_path"]
  true_label = row["class_name"]
  pred_class = row["pred_class"]
  pred_prob = row["pred_prob"]

  img = torchvision.io.read_image(str(image_path)).permute(1, 2, 0)  # get image as tensor and permute to [height, width, color_channels]
  plt.imshow(img)
  plt.title(f"True: {true_label} | Pred: {pred_class} | Prob: {pred_prob:.4f}")
  plt.axis(False)
  plt.show()

In [None]:
# Check results with 20% of data for 10 epochs using eff_b0
max(results["test_acc"]), min(results["test_loss"])

In [None]:
# Check results_B2 with 20% of data for 10 epochs using eff_b2 (double the model parameters)
max(results_B2["test_acc"]), min(results_B2["test_loss"])

In [None]:
%%writefile going_modular/get_any_data.py
"""
Contains functionality for creating data folders and downloading requested data.
"""
import os
import requests
import zipfile
from pathlib import Path

def from_path(from_path: str,         # e.g. "pizza_steak_sushi_20_percent.zip"
              image_dir: str):        # e.g. "pizza_steak_sushi"
  # Set up path to data folder
  data_path = Path("data/")
  image_path = data_path / image_dir  # "pizza_steak_sushi"

  # If the image folder doesn't exist, download it and prepare it...
  if image_path.is_dir():
    print(f"{image_path} directory exists.")
  else:
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

    # Download images
    with open(data_path / from_path, "wb") as f:  # "pizza_steak_sushi_20_percent.zip"
      #url = Path("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/") / from_path  # Path removes extra slash
      url = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/" + from_path
      request = requests.get(url)
      print("Downloading {image_dir} data...")    # pizza, steak, sushi
      f.write(request.content)

    # Unzip image data
    with zipfile.ZipFile(data_path / from_path, "r") as zip_ref:  # "pizza_steak_sushi_20_percent.zip"
      print("Unzipping {image_dir} data...")      # pizza, steak, sushi
      zip_ref.extractall(image_path)

    # Remove zip file
    os.remove(data_path / from_path)  # "pizza_steak_sushi_20_percent.zip"

In [None]:
!rm -rf data/
!ls

In [None]:
!ls going_modular/

In [None]:
from going_modular import get_any_data
get_any_data.from_path(from_path="pizza_steak_sushi_20_percent.zip", image_dir="pizza_steak_sushi")

In [None]:
!ls data/pizza_steak_sushi