In [2]:
import torch
import torchvision
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
import zipfile
import os
import cv2
import matplotlib.pyplot as plt


In [3]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [4]:
# Extract dataset using raw string
with zipfile.ZipFile(r'C:\Users\Admin\OneDrive\Desktop\archive (1).zip', 'r') as zip_ref:
    zip_ref.extractall(r'extracted_archive(1)')

In [5]:
# Check extracted folder
print(os.listdir('extracted_archive(1)'))

['Waste Images']


In [6]:
# Load dataset using raw string to avoid backslash errors
dataset_path = r'C:\Users\Admin\OneDrive\Desktop\extracted_archive (1)'
data_set = ImageFolder(dataset_path, transform=transform)

In [7]:
# Print dataset stats
print(f"Number of classes: {len(data_set.classes)}")
print(f"Number of images: {len(data_set)}")

Number of classes: 1
Number of images: 8235


In [8]:
# Split dataset
train_size = int(0.75 * len(data_set))
test_size = len(data_set) - train_size
train_dataset, test_dataset = random_split(data_set, [train_size, test_size])

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

In [9]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=2)


In [10]:
# Visualization function
def visualize(dataloader):
    data = next(iter(dataloader))
    images, labels = data
    grid = make_grid(images, nrow=8).permute(1, 2, 0).numpy()
    plt.figure(figsize=(16, 16))
    plt.imshow(grid)
    plt.axis('off')
    plt.show()

In [11]:
# LeNet model definition
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(in_features=29 * 29 * 16, out_features=120),
            nn.ReLU(),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Linear(in_features=84, out_features=len(data_set.classes))  # Change to the number of classes
        )

    def forward(self, x):
        return self.model(x)

In [12]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create model
model = LeNet().to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()


In [13]:
# Training loop
epochs = 5
training_loss = []
validation_loss = []

for epoch in range(epochs):
    model.train()
    epoch_train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        train_loss = loss_fn(outputs, labels)
        train_loss.backward()
        optimizer.step()
        epoch_train_loss += train_loss.item() * images.size(0)

    epoch_train_loss /= len(train_loader.dataset)
    training_loss.append(epoch_train_loss)

In [14]:
# Validation phase
model.eval()
epoch_val_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss = loss_fn(outputs, labels)
            epoch_val_loss += val_loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

epoch_val_loss /= len(val_loader.dataset)
validation_loss.append(epoch_val_loss)
accuracy = 100 * correct / total

print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {epoch_train_loss:.4f}, Val Loss: {epoch_val_loss:.4f}, Accuracy: {accuracy:.2f}%')

print("Training complete.")

Epoch [5/5], Train Loss: 0.0000, Val Loss: 0.0000, Accuracy: 100.00%
Training complete.


In [15]:
# Evaluate on test data
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

overall_accuracy = correct / total * 100
print(f'Overall Test Accuracy: {overall_accuracy:.2f}%')

Overall Test Accuracy: 100.00%


In [16]:
# Save the model
torch.save(model.state_dict(), 'lenet_model.pth')


In [17]:
# Camera capture and categorization
cap = cv2.VideoCapture(0)  # Change 0 to 1 or 2 if you have multiple cameras

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    print("Camera is open. Press 'q' to quit.")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            break


Camera is open. Press 'q' to quit.


KeyboardInterrupt: 

In [18]:
# Preprocess the frame for prediction
frame_resized = cv2.resize(frame, (128, 128))
frame_tensor = transform(frame_resized).unsqueeze(0).to(device)  # Add batch dimension and move to device

        # Get predictions
model.eval()
with torch.no_grad():
            outputs = model(frame_tensor)
            _, predicted = torch.max(outputs.data, 1)
            predicted_class = data_set.classes[predicted.item()]  # Get class name from index


TypeError: Unexpected type <class 'numpy.ndarray'>

In [None]:
# Display the result
cv2.putText(frame, f'Predicted: {predicted_class}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Camera', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
        break

In [None]:
# Release the camera and close windows
cap.release()
cv2.destroyAllWindows()