# Perceptron vs MLP: Universal Dataset Assignment

## Aim / Objective
To implement and compare the performance of the Perceptron algorithm and a Multilayer Perceptron (MLP) on a given dataset (image or CSV), and draw relevant inferences from performance metrics and visualizations.

## Dataset Description
This notebook supports two types of datasets:
- **CSV Dataset**: A numerical dataset (e.g., `Assess-1-QualityPrediction-numericalDataset.csv`) with features and labels.
- **Image Dataset**: Images organized in folders by class labels.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler, LabelEncoder
import seaborn as sns

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
MODE = 'csv'  # or 'image'
# ✅ STEP 1: Mount Drive
from google.colab import drive
drive.mount('/content/drive')


In [None]:
if MODE == 'csv':
    df = pd.read_csv('/content/drive/MyDrive/Assess-1-QualityPrediction-numericalDataset.csv')  # Adjust path as needed
    print("Dataset Shape:", df.shape)
    df.head()


## Necessary Pre-processing / Visualization and its Inference

In [None]:
if MODE == 'csv':
    sns.heatmap(df.corr(), annot=True, fmt='.2f')
    plt.title("Correlation Matrix")
    plt.show()

    # Encode labels
    X = df.iloc[:, :-1]
    y = LabelEncoder().fit_transform(df.iloc[:, -1])

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


## Design of the Model and its Summary

In [None]:
if MODE == 'csv':
    print("### Perceptron Model ###")
    clf = Perceptron()
    clf.fit(X_train, y_train)
    print("Training Accuracy:", clf.score(X_train, y_train))
    print("Test Accuracy:", clf.score(X_test, y_test))


In [None]:
if MODE == 'csv':
    class SimpleMLP(nn.Module):
        def __init__(self, input_dim, output_dim):
            super(SimpleMLP, self).__init__()
            self.model = nn.Sequential(
                nn.Linear(input_dim, 64),
                nn.ReLU(),
                nn.Linear(64, output_dim)
            )

        def forward(self, x):
            return self.model(x)

    input_dim = X_train.shape[1]
    output_dim = len(set(y))
    model = SimpleMLP(input_dim, output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

    train_losses, test_accuracies = [], []

    for epoch in range(50):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        model.eval()
        with torch.no_grad():
            preds = model(X_test_tensor).argmax(dim=1)
            acc = (preds == y_test_tensor).float().mean().item()
            test_accuracies.append(acc)
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}, Test Accuracy: {acc:.4f}")


## Graphs

In [None]:
plt.plot(train_losses)
plt.title("Epochs vs Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid()
plt.show()

plt.plot(test_accuracies)
plt.title("Epochs vs Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid()
plt.show()


## Performance Analysis (Class-wise Results)

In [None]:
if MODE == 'csv':
    print("Classification Report for Perceptron")
    print(classification_report(y_test, clf.predict(X_test)))

    print("Classification Report for MLP")
    print(classification_report(y_test, preds.cpu().numpy()))


## Tabulated Results and Summary of Findings

In [None]:
if MODE == 'csv':
    results = {
        'Model': ['Perceptron', 'MLP'],
        'Accuracy': [clf.score(X_test, y_test), test_accuracies[-1]]
    }
    pd.DataFrame(results)


## Conclusion
The Multilayer Perceptron (MLP) model outperforms the traditional Perceptron in this dataset due to its ability to learn non-linear decision boundaries. While Perceptron is simpler and faster, MLP provides better generalization.