In [None]:
# Block 1: Install fastai and duckduckgo_search (for image downloading)
!pip install -Uqq fastai ddgs

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import sys
!{sys.executable} -m pip install -U ddgs fastai



In [None]:
# Block 2: Import libraries and setup search function
from fastai.vision.all import *
from ddgs import DDGS
import time

def search_images(term, max_images=20):
    print(f"Searching for '{term}'...")
    urls = []

    with DDGS() as ddgs:
        results = ddgs.images(term, max_results=max_images)

        for r in results:
            urls.append(r["image"])
            time.sleep(0.1)  # slow down to avoid rate limit

    return L(urls)


In [None]:
# Block 3: Download images (Forest vs Bird)
searches = 'forest', 'bird'
path = Path('bird_or_not')

for o in searches:
    dest = path/o
    dest.mkdir(exist_ok=True, parents=True)

    urls = search_images(f'{o} photo', max_images=20)
    download_images(dest, urls=urls)
    resize_images(dest, max_size=400)

    time.sleep(5)  # pause between categories (IMPORTANT)




Searching for 'forest photo'...
Searching for 'bird photo'...


In [None]:
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
print(f"Removed {len(failed)} corrupted images")


Removed 1 corrupted images


In [None]:
# Block 4: Train the Model
# "DataBlock" tells fastai how to read the data (Inputs=Images, Output=Categories)

from fastai.callback.progress import ProgressCallback

# Create DataLoaders
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(192, method='squish')
).dataloaders(path, bs=32)

# Create learner
learn = vision_learner(dls, resnet18, metrics=error_rate)

learn.remove_cbs(ProgressCallback)

# Train without progress bar
learn.fine_tune(5)


[0, nan, 2.116076707839966, 0.7142857313156128, '00:08']
[0, nan, 2.116076707839966, 0.7142857313156128, '00:07']
[1, nan, 2.116076707839966, 0.7142857313156128, '00:07']
[2, nan, 2.116076707839966, 0.7142857313156128, '00:09']
[3, nan, 2.116076707839966, 0.7142857313156128, '00:07']
[4, nan, 2.116076707839966, 0.7142857313156128, '00:08']


In [None]:
# Block 5: Test with a new image
# We'll search for one new bird image to test

from fastdownload import download_url

urls = search_images('bird photo', max_images=1)
download_url(urls[0], 'test_bird.jpg', show_progress=False)

is_bird,_,probs = learn.predict(PILImage.create('test_bird.jpg'))
print(f"This is a: {is_bird}.")
print(f"Probability it's a bird: {probs[0]:.4f}")

Searching for 'bird photo'...
This is a: forest.
Probability it's a bird: 0.1132
