In [6]:
# scrape_images.py
from icrawler.builtin import GoogleImageCrawler
import os

# Parameters
search_term = "bear"
output_folder = "bear_images"

# Make sure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Download images
crawler = GoogleImageCrawler(storage={'root_dir': output_folder})
crawler.crawl(keyword=search_term, max_num=10)


2025-05-17 18:36:27,749 - INFO - icrawler.crawler - start crawling...
2025-05-17 18:36:27,750 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-05-17 18:36:27,751 - INFO - feeder - thread feeder-001 exit
2025-05-17 18:36:27,753 - INFO - icrawler.crawler - starting 1 parser threads...
2025-05-17 18:36:27,757 - INFO - icrawler.crawler - starting 1 downloader threads...
2025-05-17 18:36:28,411 - INFO - parser - parsing result page https://www.google.com/search?q=bear&ijn=0&start=0&tbs=&tbm=isch
2025-05-17 18:36:28,874 - ERROR - downloader - Response status code 404, file https://upload.wikimedia.org/wikipedia/commons/thumb/9/9e/Ours_brun_parcanimalierpyrenees_1.jpg
2025-05-17 18:36:29,096 - ERROR - downloader - Response status code 404, file https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/2010-kodiak-bear-1.jpg
2025-05-17 18:36:29,207 - ERROR - downloader - Response status code 401, file https://transforms.stlzoo.org/production/animals/grizzly-bear-01-01.jpg
2025-05-

In [7]:
# scrape_images.py
from icrawler.builtin import GoogleImageCrawler
import os

# Parameters
search_term = "elephant"
output_folder = "elephant_images"

# Make sure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Download images
crawler = GoogleImageCrawler(storage={'root_dir': output_folder})
crawler.crawl(keyword=search_term, max_num=10)


2025-05-17 18:36:34,776 - INFO - icrawler.crawler - start crawling...
2025-05-17 18:36:34,778 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-05-17 18:36:34,780 - INFO - feeder - thread feeder-001 exit
2025-05-17 18:36:34,785 - INFO - icrawler.crawler - starting 1 parser threads...
2025-05-17 18:36:34,787 - INFO - icrawler.crawler - starting 1 downloader threads...
2025-05-17 18:36:35,389 - INFO - parser - parsing result page https://www.google.com/search?q=elephant&ijn=0&start=0&tbs=&tbm=isch
2025-05-17 18:36:35,786 - INFO - downloader - image #1	https://upload.wikimedia.org/wikipedia/commons/3/37/African_Bush_Elephant.jpg
2025-05-17 18:36:36,738 - INFO - downloader - image #2	https://www.colchesterzoologicalsociety.com/wp-content/uploads/2024/09/African-Elephant-1.jpg
2025-05-17 18:36:36,776 - INFO - downloader - image #3	https://cdn.britannica.com/02/152302-050-1A984FCB/African-savanna-elephant.jpg
2025-05-17 18:36:36,849 - ERROR - downloader - Response status code 401

In [8]:
# process_images.py
import os
from PIL import Image

# Parameters
input_folder = "bear_images"
output_folder = os.path.join(input_folder, "mobilenet_ready")
mobilenet_size = (224, 224)

os.makedirs(output_folder, exist_ok=True)

for fname in os.listdir(input_folder):
    fpath = os.path.join(input_folder, fname)
    if not os.path.isfile(fpath):
        continue  # Skip folders

    if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')):
        try:
            with Image.open(fpath) as img:
                img = img.convert("RGB")
                img = img.resize(mobilenet_size, Image.LANCZOS)
                out_path = os.path.join(output_folder, os.path.splitext(fname)[0] + ".jpg")
                img.save(out_path, format="JPEG", quality=85)
        except Exception as e:
            print(f"Failed to process {fname}: {e}")


In [10]:
# process_images.py
import os
from PIL import Image

# Parameters
input_folder = "elephant_images"
output_folder = os.path.join(input_folder, "mobilenet_ready")
mobilenet_size = (224, 224)

os.makedirs(output_folder, exist_ok=True)

for fname in os.listdir(input_folder):
    fpath = os.path.join(input_folder, fname)
    if not os.path.isfile(fpath):
        continue  # Skip folders

    if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp', '.jfif')):
        try:
            with Image.open(fpath) as img:
                img = img.convert("RGB")
                img = img.resize(mobilenet_size, Image.LANCZOS)
                out_path = os.path.join(output_folder, os.path.splitext(fname)[0] + ".jpg")
                img.save(out_path, format="JPEG", quality=85)
        except Exception as e:
            print(f"Failed to process {fname}: {e}")


In [11]:
import os
import torch
from PIL import Image
from torchvision import models, transforms
from torchvision.datasets.folder import default_loader
from IPython.display import display, Markdown
import requests
import pandas as pd
from torchvision.models import MobileNet_V2_Weights


# Load ImageNet labels
response = requests.get("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt")
imagenet_labels = response.text.strip().split("\n")

# Set up model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
model.eval()




# Define transform for MobileNetV2
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Define label grouping
fine_to_coarse = {
    "brown bear": "bear",
    "ice bear": "bear",
    "grizzly": "bear",
    "American black bear": "bear",

    "African elephant": "elephant",
    "Indian elephant": "elephant",
    "tusker": "elephant"
}

# Display explanation
display(Markdown("""
### 🔍 Coarse-Grained Classification Summary

We now check whether MobileNetV2 with pretrained ImageNet weights correctly classifies each image from the folders below as **some kind of bear** or **some kind of elephant**.

- 🐻 `"brown bear"`, `"ice bear"`, etc. → **bear**
- 🐘 `"African elephant"`, `"tusker"`, etc. → **elephant**

This checks how well the model generalizes without needing exact species matches.
"""))

# Prediction loop
results = []

def predict_folder(folder_path, label_name):
    print(f"\nFolder: {folder_path}")
    for fname in os.listdir(folder_path):
        if not fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        img_path = os.path.join(folder_path, fname)
        try:
            img = default_loader(img_path)
            img_tensor = transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                output = model(img_tensor)
                pred_idx = torch.argmax(output, dim=1).item()
                pred_label = imagenet_labels[pred_idx]
                coarse_label = fine_to_coarse.get(pred_label, "other")
                correct = (coarse_label == label_name)
                results.append({
                    "filename": fname,
                    "true_label": label_name,
                    "predicted_label": pred_label,
                    "coarse_label": coarse_label,
                    "correct": correct
                })
                print(f"{fname:<20} | True: {label_name:<9} | Predicted: {pred_label:<20} | Mapped: {coarse_label} | Correct: {correct}")
        except Exception as e:
            print(f"Failed to process {fname}: {e}")

# Run for both folders
predict_folder("bear_images/mobilenet_ready", "bear")
predict_folder("elephant_images/mobilenet_ready", "elephant")

# Evaluate coarse-label accuracy
correct = sum(1 for r in results if r['correct'])
total = len(results)
print(f"\n✅ Coarse-Grained Accuracy: {correct}/{total} = {100 * correct / total:.2f}%")

# Optional: display as DataFrame in notebook
import pandas as pd
results_df = pd.DataFrame(results)
display(results_df)


### 🔍 Coarse-Grained Classification Summary

We now check whether MobileNetV2 with pretrained ImageNet weights correctly classifies each image from the folders below as **some kind of bear** or **some kind of elephant**.

- 🐻 `"brown bear"`, `"ice bear"`, etc. → **bear**
- 🐘 `"African elephant"`, `"tusker"`, etc. → **elephant**

This checks how well the model generalizes without needing exact species matches.



Folder: bear_images/mobilenet_ready
000001.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000002.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000003.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000004.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000005.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000006.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000007.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000008.jpg           | True: bear      | Predicted: ice bear             | Mapped: bear | Correct: True
000009.jpg           | True: bear      | Predicted: brown bear           | Mapped: bear | Correct: True
000010.jpg           | True

Unnamed: 0,filename,true_label,predicted_label,coarse_label,correct
0,000001.jpg,bear,brown bear,bear,True
1,000002.jpg,bear,brown bear,bear,True
2,000003.jpg,bear,brown bear,bear,True
3,000004.jpg,bear,brown bear,bear,True
4,000005.jpg,bear,brown bear,bear,True
5,000006.jpg,bear,brown bear,bear,True
6,000007.jpg,bear,brown bear,bear,True
7,000008.jpg,bear,ice bear,bear,True
8,000009.jpg,bear,brown bear,bear,True
9,000010.jpg,bear,brown bear,bear,True


In [3]:
import requests

# Get labels from PyTorch GitHub (used for ImageNet models)
url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
response = requests.get(url)
labels = response.text.strip().split("\n")

# Save to a text file
with open("mobilenet_imagenet_labels.txt", "w") as f:
    for label in labels:
        f.write(label + "\n")

print("✅ Saved labels to mobilenet_imagenet_labels.txt")


✅ Saved labels to mobilenet_imagenet_labels.txt
