# Yellow Flannel Classifer

Creates a PyTorch model using theneedledrop YouTube thumbnails to train a convolutional neural network (CNN) for binary image classification

In [1]:
dataset_root = "./data"

Import the necessary libraries

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.optimizer import Optimizer
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

from typing import Tuple

Define transformations and load the dataset

In [3]:
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = datasets.ImageFolder(root=dataset_root, transform=transform)

Split the dataset into training and validation sets

In [4]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [5]:
# Create data loaders for training and validation sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

Create a CNN model for binary classification

In [6]:
# Create a CNN model for binary classification
class BinaryCNN(nn.Module):
    def __init__(self):
        super(BinaryCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 64)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.sigmoid(x)

model = BinaryCNN()


Define the loss function and optimizer

In [7]:
# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Train the model

In [8]:
# Train the model
def train(model: nn.Module, train_loader: DataLoader, optimizer: Optimizer, criterion: nn.BCELoss, device: torch.device) -> float:
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        images, labels = images.to(device), labels.to(device).float().view(-1, 1)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

Evaluate the model

In [9]:
# Evaluate the model
def evaluate(model: nn.Module, val_loader: DataLoader, criterion: nn.BCELoss, device: torch.device) -> Tuple[float, float]:
    model.eval()
    running_loss = 0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().view(-1, 1)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            pred_labels = torch.round(outputs)
            correct_preds += (pred_labels == labels).sum().item()
            total_preds += labels.size(0)
    return running_loss / len(val_loader), correct_preds / total_preds * 100


Run the training and evaluation loop

In [10]:
# Define the number of epochs and device
epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train and evaluate the model
for epoch in range(1, epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)
    if epoch % 10 == 0:
        print(f"Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")


Epoch 10/100, Train Loss: 0.0631, Val Loss: 0.0406, Val Acc: 99.60%
Epoch 20/100, Train Loss: 0.0499, Val Loss: 0.0317, Val Acc: 99.80%
Epoch 30/100, Train Loss: 0.0407, Val Loss: 0.0256, Val Acc: 99.80%
Epoch 40/100, Train Loss: 0.0329, Val Loss: 0.0220, Val Acc: 99.80%
Epoch 50/100, Train Loss: 0.0266, Val Loss: 0.0195, Val Acc: 99.60%
Epoch 60/100, Train Loss: 0.0221, Val Loss: 0.0165, Val Acc: 99.80%
Epoch 70/100, Train Loss: 0.0184, Val Loss: 0.0144, Val Acc: 99.80%
Epoch 80/100, Train Loss: 0.0158, Val Loss: 0.0121, Val Acc: 99.80%
Epoch 90/100, Train Loss: 0.0133, Val Loss: 0.0114, Val Acc: 99.80%
Epoch 100/100, Train Loss: 0.0112, Val Loss: 0.0096, Val Acc: 99.80%


## Analysis

Human performance is 100% accurate.

Model might have some bias. We want Train Loss to be close to human performance.

I should add some print statements to see how many yellow flannels are in the training set and validation set. 

Assuming the data split is ok, I should increase the size of the CNN.

## Summary
This code first trains the model for the specified number of epochs, then evaluates the model on the validation set, and prints the training loss, validation loss, and validation accuracy for each epoch.

Adjust the number of epochs, learning rate, and other hyperparameters as needed to improve the model's accuracy.

