<a href="https://colab.research.google.com/github/ssubedir/torch-deep-down/blob/main/CNN_Hotdog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hotdog CNN

In [None]:
!git lfs install
!git clone https://huggingface.co/datasets/truepositive/hotdog_nothotdog

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # Input channels=3, Output channels=32
        self.pool1 = nn.MaxPool2d(2, 2)  # Pooling layer for conv1

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # Input channels=32, Output channels=64
        self.pool2 = nn.MaxPool2d(2, 2)  # Pooling layer for conv2

        # Flatten layer will be applied in forward pass

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 37 * 37, 128)  # Adjusted based on input size after pooling (150x150 -> 37x37)
        self.fc2 = nn.Linear(128, 1)  # binary output

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Convolutional and pooling layers with ReLU activation
        x = self.pool1(F.relu(self.conv1(x)))  # After conv1 + pool1
        x = self.pool2(F.relu(self.conv2(x)))  # After conv2 + pool2

        # Flatten
        x = x.view(-1, 64 * 37 * 37)  # Adjust based on input dimensions after pooling

        # Fully connected layers with ReLU and dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)

        # Output layer with sigmoid activation for binary classification
        x = torch.sigmoid(self.fc2(x))

        return x


In [4]:
# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # resize images to 64x64
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # normalize
])

# Set up paths to your dataset
train_path = '/content/hotdog_nothotdog/train'
val_path = '/content/hotdog_nothotdog/val'

# Load datasets
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [5]:
print(train_dataset.class_to_idx)


{'hotdog': 0, 'not_hotdog': 1}


In [6]:
# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [8]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader

# Define the model, criterion, and optimizer
model = SimpleCNN()
criterion = nn.BCELoss()  # For binary classification with sigmoid output
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training parameters
num_epochs = 10
batch_size = 32

# Assuming you have a DataLoader for your dataset
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Training Loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move to device (GPU/CPU)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        outputs = outputs.squeeze()  # Remove dimensions of size 1 for compatibility

        # Compute loss
        loss = criterion(outputs, labels.float())  # Convert labels to float for BCELoss
        running_loss += loss.item()

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Accuracy calculation
        predictions = (outputs > 0.5).float()  # Threshold predictions at 0.5
        correct += (predictions == labels).sum().item()
        total += labels.size(0)

    # Print statistics for the epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct / total * 100
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

    # Validation (optional)
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient computation for validation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels.float())
            val_loss += loss.item()
            predictions = (outputs > 0.5).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader)
    val_accuracy = correct / total * 100
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

print("Training completed.")


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [19]:
!! wget https://github.com/pytorch/hub/raw/master/images/dog.jpg
!! wget https://media.npr.org/assets/img/2023/06/29/img_1753-1500x1125_wide-c265254b8cd9a1647e3306e717361f529b2d8a09.jpg?s=800&c=85&f=webp

['--2024-11-12 19:11:57--  https://media.npr.org/assets/img/2023/06/29/img_1753-1500x1125_wide-c265254b8cd9a1647e3306e717361f529b2d8a09.jpg?s=800',
 'Resolving media.npr.org (media.npr.org)... 173.223.234.25, 173.223.234.45, 2600:1406:bc00:31::b81c:922d',
 'Connecting to media.npr.org (media.npr.org)|173.223.234.25|:443... connected.',
 'HTTP request sent, awaiting response... 200 OK',
 'Length: unspecified [image/jpeg]',
 'Saving to: ‘img_1753-1500x1125_wide-c265254b8cd9a1647e3306e717361f529b2d8a09.jpg?s=800’',
 '',
 '',
 '          img_1753-     [<=>                 ]       0  --.-KB/s               ',
 'img_1753-1500x1125_     [ <=>                ] 109.42K  --.-KB/s    in 0.07s   ',
 '',
 '2024-11-12 19:11:57 (1.47 MB/s) - ‘img_1753-1500x1125_wide-c265254b8cd9a1647e3306e717361f529b2d8a09.jpg?s=800’ saved [112051]',
 '']

In [22]:
from PIL import Image
import torchvision.transforms as transforms

# Define preprocessing steps to match training
preprocess = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize to match input size
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize
])

def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')  # Open and ensure RGB
    image = preprocess(image)  # Apply preprocessing transformations
    image = image.unsqueeze(0)  # Add batch dimension
    return image

def predict_image(model, image_path):
    model.eval()  # Set the model to evaluation mode
    image = preprocess_image(image_path)  # Preprocess the image
    image = image.to(device)  # Send to device (CPU or GPU)

    # Make the prediction
    with torch.no_grad():  # Disable gradient calculation for inference
        output = model(image).squeeze()  # Get the output and remove extra dimensions
        probability = torch.sigmoid(output).item()  # Get probability for class "nothotdog"

    # Interpret the output
    if probability > 0.5:
        print(f"The image is likely 'nothotdog' with a probability of {probability:.4f}")
    else:
        print(f"The image is likely 'notdog' with a probability of {1 - probability:.4f}")

# Example usage with the in-memory trained model
predict_image(model, '/content/dog.jpg')
predict_image(model, '/content/img_1753-1500x1125_wide-c265254b8cd9a1647e3306e717361f529b2d8a09.jpg')

The image is likely 'nothotdog' with a probability of 0.7311
The image is likely 'nothotdog' with a probability of 0.7310
