In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import tkinter as tk
from settings import url,train, test, model as model_save_path

print(model_save_path)

# Data Transformation
transform = transforms.Compose([
    transforms.Resize((240, 240)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# CNN Model Definition
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

        # Dynamically calculate the flattened size
        dummy_input = torch.zeros(1, 3, 240, 240)
        x = self.pool(self.relu(self.conv1(dummy_input)))
        x = self.pool(self.relu(self.conv2(x)))
        self.flattened_size = x.numel()

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, self.flattened_size)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load Dataset
all_data = datasets.ImageFolder(train, transform=transform)
class_names = all_data.classes
classes = len(class_names)

# Split Dataset into Training and Validation
train_ratio = 0.8
train_size = int(train_ratio * len(all_data))
val_size = len(all_data) - train_size
train_data, val_data = random_split(all_data, [train_size, val_size])

# Data Loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

# Load Test Dataset
test_data = datasets.ImageFolder(test, transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Initialize Model
model = CNN(num_classes=classes).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Validation Functions
def train_one_epoch(model, loader, loss_fn, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)

def validate(model, loader, loss_fn, device):
    model.eval()
    total, correct = 0, 0
    running_loss = 0.0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    accuracy = 100 * correct / total
    return running_loss / len(loader), accuracy

def evaluate_test_accuracy(model, test_loader, device):
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

def predict_from_webcam(frame, model, class_names, transform, device):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = transform(Image.fromarray(image)).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        probabilities = F.softmax(output, dim=1)[0]
        predicted_index = probabilities.argmax().item()
        predicted_class = class_names[predicted_index]
        confidence = probabilities[predicted_index].item() * 100
    return predicted_class, confidence

# Mode Selection
def select_mode():
    def train_clicked():
        global mode
        mode = "train"
        root.destroy()

    def test_clicked():
        global mode
        mode = "test"
        root.destroy()

    def realtime_clicked():
        global mode
        mode = "realtime"
        root.destroy()

    root = tk.Tk()
    root.title("Mode Selection")
    root.geometry("300x200")

    label = tk.Label(root, text="Select Mode", font=("Arial", 14))
    label.pack(pady=20)

    train_button = tk.Button(root, text="Train", command=train_clicked, width=10, bg="lightblue")
    train_button.pack(pady=5)

    test_button = tk.Button(root, text="Test", command=test_clicked, width=10, bg="lightgreen")
    test_button.pack(pady=5)

    realtime_button = tk.Button(root, text="Real-Time", command=realtime_clicked, width=10, bg="lightyellow")
    realtime_button.pack(pady=5)

    root.mainloop()

# Main Script
mode = None
select_mode()

if mode == "train":
    # Training Loop
    epochs = 10
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        train_loss = train_one_epoch(model, train_loader, loss_fn, optimizer, device)
        val_loss, val_accuracy = validate(model, val_loader, loss_fn, device)
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved at {model_save_path}")

elif mode == "test":
    model.load_state_dict(torch.load(model_save_path, map_location=device))
    print(f"Model loaded from {model_save_path}")
    test_accuracy = evaluate_test_accuracy(model, test_loader, device)
    print(f"Test Accuracy: {test_accuracy:.2f}%")

elif mode == "realtime":
    model.load_state_dict(torch.load(model_save_path, map_location=device))
    model.eval()
    print("Starting real-time predictions. Press 'q' to quit.")
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (640, 480))
        predicted_class, confidence = predict_from_webcam(frame, model, class_names, transform, device)
        cv2.putText(frame, f"{predicted_class}: {confidence:.2f}%", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow("Real-Time Predictions", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

else:
    print("Invalid mode selected.")


C:\Users\x\Documents\GitHub\x\AI\birds\birds-v1.pth


  model.load_state_dict(torch.load(model_save_path, map_location=device))


Model loaded from C:\Users\x\Documents\GitHub\x\AI\birds\birds-v1.pth
Test Accuracy: 83.33%
