# Bottle Classification Project Notebook
This notebook covers data exploration, preprocessing, visualization, model training, and results summary for the Bottle Classification project.

## 1. Load and Explore Data

In [2]:
import os
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

plt.style.use('default')
sns.set_palette('husl')

# Define transforms
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
])
val_test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
])

# Load datasets
train_dataset = ImageFolder('dataset/train', transform=train_transform)
val_dataset = ImageFolder('dataset/val', transform=val_test_transform)
test_dataset = ImageFolder('dataset/test', transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

classes = train_dataset.classes
print('Classes:', classes)
print(f'Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}')

ModuleNotFoundError: No module named 'seaborn'

## 2. Data Preprocessing

In [None]:
# Count images per class in train set
class_counts = {cls:0 for cls in classes}
for _, label in train_dataset.samples:
    class_counts[classes[label]] += 1
print('Number of images per class (Train):')
for cls, count in class_counts.items():
    print(f'{cls}: {count}')

## 3. Data Visualization

In [None]:
# Visualize sample images from each class
fig, axes = plt.subplots(len(classes), 5, figsize=(15, 3*len(classes)))
for cls_idx, cls_name in enumerate(classes):
    cls_samples = [s for s in train_dataset.samples if s[1]==cls_idx][:5]
    for i, (img_path, _) in enumerate(cls_samples):
        img = transforms.ToTensor()(plt.imread(img_path))
        ax = axes[cls_idx, i] if len(classes)>1 else axes[i]
        ax.imshow(img.permute(1,2,0))
        ax.set_title(cls_name)
        ax.axis('off')
plt.tight_layout()
plt.show()

# Bar plot for class distribution
plt.figure(figsize=(6,4))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
plt.title('Class Distribution (Train Set)')
plt.xlabel('Classes')
plt.ylabel('Number of images')
plt.show()

## 4. Model Training

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(classes))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

num_epochs = 8
best_val_acc = 0.0
for epoch in range(num_epochs):
    print(f'\n📌 Epoch {epoch+1}/{num_epochs}')
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_loader)
    
    # Validation
    model.eval()
    correct, total = 0,0
    y_true, y_pred = [],[]
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs,1)
            correct += (preds==labels).sum().item()
            total += labels.size(0)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    val_acc = correct/total
    print(f'Loss: {train_loss:.4f} | Val Accuracy: {val_acc*100:.2f}%')
    if val_acc>best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(),'best_bottle_model.pth')
        print('✅ Saved Best Model')
    scheduler.step()

## 5. Results Summary

In [None]:
# Test evaluation
model.eval()
y_true, y_pred = [],[]
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs,1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

accuracy = (np.array(y_true)==np.array(y_pred)).mean()
report = classification_report(y_true, y_pred, target_names=classes)
cm = confusion_matrix(y_true, y_pred)

print(f'📊 Test Accuracy: {accuracy*100:.2f}%')
print('\nClassification Report:')
print(report)

# Confusion Matrix plot
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=classes, yticklabels=classes, cmap='Blues')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.show()