In [26]:
# run only once
!pip install duckduckgo_search

In [2]:
from duckduckgo_search import DDGS

def search_images(keyword, max_results=10):
    with DDGS() as ddgs:
        images = ddgs.images(
            keyword,
            max_results=max_results
        )
        return [img['image'] for img in images]

In [7]:
keyword = "teddybear"
image_urls = search_images(keyword, 1000)
len(image_urls)

418

In [8]:
image_urls[123]

'https://i.etsystatic.com/23408950/r/il/4ed2a5/3977829066/il_fullxfull.3977829066_f608.jpg'

In [18]:
import os
import requests
from urllib.parse import urlparse
import warnings

def download_image(url, folder, custom_name=None, verbose=True):
    # Create the folder if it doesn't exist
    os.makedirs(folder, exist_ok=True)

    # Get the filename from the URL or use the custom name
    if custom_name:
        filename = custom_name
    else:
        filename = os.path.basename(urlparse(url).path)
        if not filename:
            filename = 'image.jpg'  # Default filename if none is found in the URL

    # Ensure the filename has an extension
    if not os.path.splitext(filename)[1]:
        filename += '.jpg'

    filepath = os.path.join(folder, filename)

    # If the file already exists, append a number to make it unique
    base, extension = os.path.splitext(filepath)
    counter = 1
    while os.path.exists(filepath):
        filepath = f"{base}_{counter}{extension}"
        counter += 1

    try:
        # Send a GET request to the URL with a timeout of 10 seconds
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises an HTTPError for bad responses

        # Check if the content type is an image
        content_type = response.headers.get('content-type', '')
        if not content_type.startswith('image'):
            if verbose:
                warnings.warn(f"The URL does not point to an image. Content-Type: {content_type}")
            return False

        # Write the image content to the file
        with open(filepath, 'wb') as f:
            f.write(response.content)

        if verbose:
            print(f"Image successfully downloaded: {filepath}")
        return True

    except requests.exceptions.Timeout:
        if verbose: 
            warnings.warn(f"Download timed out for URL: {url}")
    except requests.exceptions.HTTPError as e:
        if verbose: 
            warnings.warn(f"HTTP error occurred: {e}")
    except requests.exceptions.RequestException as e:
        if verbose: 
            warnings.warn(f"An error occurred while downloading the image: {e}")
    except IOError as e:
        if verbose: 
            warnings.warn(f"An error occurred while writing the file: {e}")

    return False

In [20]:
from tqdm.notebook import tqdm

for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/teddybear/", f'image{i}.jpg', verbose=False)

  0%|          | 0/418 [00:00<?, ?it/s]

In [23]:
image_urls = search_images('"black bear"', 200)
len(image_urls)

175

In [24]:
for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/blackbear/", f'image{i}.jpg', verbose=False)

  0%|          | 0/175 [00:00<?, ?it/s]

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F

# Data transformations
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

# Load the dataset
dataset = datasets.ImageFolder(root='dataset', transform=transform)

# Split into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define the simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Input channels = 3 (RGB), output channels = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        # Max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # Fully connected layer (16 * 14 * 14 input features, 2 output classes)
        self.fc1 = nn.Linear(16 * 14 * 14, 2)

    def forward(self, x):
        # Convolutional layer with ReLU activation and pooling
        x = self.pool(F.relu(self.conv1(x)))  # Output: batch_size x 16 x 14 x 14
        # Flatten the tensor
        x = x.view(-1, 16 * 14 * 14)
        # Fully connected layer
        x = self.fc1(x)
        return x

# Instantiate the model, define the loss function and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    # Calculate average training loss
    epoch_loss = running_loss / len(train_dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}')

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)

            # Calculate accuracy
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

    # Calculate average validation loss and accuracy
    val_loss /= len(val_dataset)
    val_accuracy = correct.double() / len(val_dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

print("Training complete.")



Epoch 1/5, Training Loss: 0.4643
Epoch 1/5, Validation Loss: 0.4380, Validation Accuracy: 0.7419
Epoch 2/5, Training Loss: 0.3444
Epoch 2/5, Validation Loss: 0.3582, Validation Accuracy: 0.7903
Epoch 3/5, Training Loss: 0.2895
Epoch 3/5, Validation Loss: 0.3018, Validation Accuracy: 0.8387
Epoch 4/5, Training Loss: 0.2166
Epoch 4/5, Validation Loss: 0.2398, Validation Accuracy: 0.9274
Epoch 5/5, Training Loss: 0.1727
Epoch 5/5, Validation Loss: 0.2084, Validation Accuracy: 0.9355
Training complete.
