<a href="https://colab.research.google.com/github/menouahmad/bonus-III/blob/main/bonus_hot_dog_completed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Pretrained Models in an app

This problem is motivated by the Silicon Valley sketch.  Your goal is to build a similar app with a fine tuned model from torchvision deployed through a streamlit.

In [None]:
from IPython.display import YouTubeVideo
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
YouTubeVideo(id = 'tWwCK95X6go')

Today we will use a dataset to determine whether or not an image is a hotdog.  Dataset link: [link](https://drive.google.com/drive/folders/1d2SelwjIGAtYnui_yczzaTL61eF_oLH_?usp=sharing)

**BEGINNING**: To get started, create a basic streamlit app that accepts an image from the user and displays it on the screen.

In [None]:
ls

In [None]:
test_path = '/content/drive/MyDrive/hotdogs/data/hotdog-nothotdog/test/hotdog/1501.jpg'

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imread(test_path)

#### Pretrained Models

An example of a pretrained model is that of the ResNet through `torchvision.models`.  Below is an outline of a streamlit app to deploy the model. You should fine tune this model using the hot dog data and deploy a model that allows a user to take a picure with their camera or upload an image and returns a voice stating whether the image is a hot dog or not a hot dog.  How would you improve Jinyang's app to make the rest of the team happy?  (**HINT**: [this](https://huggingface.co/datasets/Codatta/MM-Food-100K))

**Example**: ResNet

- [link](https://docs.pytorch.org/vision/main/models/generated/torchvision.models.resnet50.html)

```
import streamlit as st
import numpy as np
from torchvision.models import
import torch



st.header('Hot Dogs!')

uploaded_file = st.file_uploader("Pick a picture")
model = ''

if uploaded_file is not None:
    img = #load the image
    #prepare the image
    #pass through the model
    #make prediction
    #speak!
    
```

In [None]:
%%writefile app_basic.py
import streamlit as st
from PIL import Image

st.header("hot dogs!")

uploaded_file = st.file_uploader("pick a picture", type=["png", "jpg", "jpeg"])

if uploaded_file is not None:
    img = Image.open(uploaded_file).convert("RGB")
    st.image(img, caption="your image", use_container_width=True)


In [None]:
%%writefile app_pretrained.py
import streamlit as st
from PIL import Image
import torch
from torchvision.models import resnet50, ResNet50_Weights

st.header("hot dogs!")

uploaded_file = st.file_uploader("pick a picture", type=["png", "jpg", "jpeg"])

# load pretrained model
weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)
model.eval()

preprocess = weights.transforms()
categories = weights.meta["categories"]

if uploaded_file is not None:
    img = Image.open(uploaded_file).convert("RGB")
    st.image(img, caption="your image", use_container_width=True)

    x = preprocess(img).unsqueeze(0)

    with torch.no_grad():
        logits = model(x)
        probs = logits.softmax(dim=1)[0]

    top_prob, top_idx = torch.max(probs, dim=0)
    label = categories[int(top_idx)]

    if "hotdog" in label.lower():
        st.success(f"hot dog ({top_prob.item():.1%} sure)")
    else:
        st.error(f"not hot dog (top guess: {label}, {top_prob.item():.1%})")


In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights

# if you are in colab and your data is in google drive, uncomment these two lines:
# from google.colab import drive
# drive.mount("/content/drive")

data_dir = "/content/drive/MyDrive/hotdogs/data/hotdog-nothotdog"

train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "valid")  # some datasets use "val"; change if needed

weights = ResNet50_Weights.DEFAULT
mean = weights.meta["mean"]
std = weights.meta["std"]

train_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

val_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

train_ds = datasets.ImageFolder(train_dir, transform=train_tfms)
val_ds = datasets.ImageFolder(val_dir, transform=val_tfms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

train_ds.class_to_idx, train_ds.classes


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = resnet50(weights=weights)

# freeze backbone
for p in model.parameters():
    p.requires_grad = False

# replace the classifier head (2 classes: hotdog vs nothotdog)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-3)

def accuracy_from_logits(logits, y):
    preds = logits.argmax(dim=1)
    return (preds == y).float().mean().item()

def run_epoch(loader, training):
    if training:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    total_acc = 0.0
    n_batches = 0

    for x, y in loader:
        x = x.to(device)
        y = y.to(device)

        if training:
            optimizer.zero_grad()

        with torch.set_grad_enabled(training):
            logits = model(x)
            loss = loss_fn(logits, y)

            if training:
                loss.backward()
                optimizer.step()

        total_loss += loss.item()
        total_acc += accuracy_from_logits(logits, y)
        n_batches += 1

    return total_loss / n_batches, total_acc / n_batches

epochs = 5
for epoch in range(1, epochs + 1):
    train_loss, train_acc = run_epoch(train_loader, training=True)
    val_loss, val_acc = run_epoch(val_loader, training=False)
    print(f"epoch {epoch}/{epochs} | train acc: {train_acc:.3f} | val acc: {val_acc:.3f}")

# save the fine tuned head + frozen backbone weights
torch.save(
    {
        "state_dict": model.state_dict(),
        "class_to_idx": train_ds.class_to_idx,
        "mean": mean,
        "std": std,
    },
    "hotdog_resnet50_finetuned.pt",
)

"saved hotdog_resnet50_finetuned.pt"


In [None]:
%%writefile app_finetuned.py
import streamlit as st
from PIL import Image
import torch
from torch import nn
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

st.header("hot dogs!")

uploaded_file = st.file_uploader("pick a picture", type=["png", "jpg", "jpeg"])

# try to load a fine tuned model if you trained it in the notebook
ckpt_path = "hotdog_resnet50_finetuned.pt"

weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)

# default labels (imagenet)
labels = weights.meta["categories"]

# if we have a fine tuned checkpoint, switch to binary labels
if torch.cuda.is_available():
    map_location = "cuda"
else:
    map_location = "cpu"

if uploaded_file is not None:
    img = Image.open(uploaded_file).convert("RGB")
    st.image(img, caption="your image", use_container_width=True)

try:
    ckpt = torch.load(ckpt_path, map_location=map_location)
    model.fc = nn.Linear(model.fc.in_features, 2)
    model.load_state_dict(ckpt["state_dict"])
    labels = ["hotdog", "not hotdog"]

    mean = ckpt.get("mean", weights.meta["mean"])
    std = ckpt.get("std", weights.meta["std"])

    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])
    st.caption("using fine tuned model")
except Exception:
    preprocess = weights.transforms()
    st.caption("using imagenet pretrained model")

model.eval()

if uploaded_file is not None:
    x = preprocess(img).unsqueeze(0)

    with torch.no_grad():
        logits = model(x)
        probs = logits.softmax(dim=1)[0]

    top_prob, top_idx = torch.max(probs, dim=0)
    label = labels[int(top_idx)]

    if "hotdog" in label.lower():
        st.success(f"hot dog ({top_prob.item():.1%} sure)")
    else:
        st.error(f"not hot dog ({top_prob.item():.1%} sure)")
