<a href="https://colab.research.google.com/github/shambhughimirenp-web/Shambhu_Ghimire/blob/main/cnnlishambhu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/shambhughimirenp-web/Shambhu_Ghimire.git

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time

In [None]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

testset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

In [None]:
class TinyVGG(nn.Module):
    def __init__(self):
        super(TinyVGG, self).__init__()

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.classifier(x)
        return x

In [None]:
import torch.nn as nn

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()

        # Convert (batch, 3, 32, 32) → (batch, 3072)
        self.flatten = nn.Flatten()

        # Fully Connected Block 1
        self.fc1 = nn.Linear(32 * 32 * 3, 512)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)

        # Fully Connected Block 2
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)

        # Output layer (10 classes)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.flatten(x)                       # Flatten image
        x = self.dropout1(self.relu1(self.fc1(x)))  # FC1 → ReLU → Dropout
        x = self.dropout2(self.relu2(self.fc2(x)))  # FC2 → ReLU → Dropout
        x = self.fc3(x)                           # Final logits
        return x

In [None]:
#  Model 2  AlexNet
class AlexNet_CIFAR10(nn.Module):
    def __init__(self):
        super(AlexNet_CIFAR10, self).__init__()

        # Input: (batch, 3, 32, 32)
        self.features = nn.Sequential(
            # Fist layer
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),   # 32x32 → 16x16

            nn.Conv2d(64, 192, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),   # 16x16 → 8x8

            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)    # 8x8 → 4x4
        )

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(),

            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),

            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)          # Extract spatial features
        x = torch.flatten(x, 1)       # Flatten: (batch, 256, 4, 4) → (batch, 4096)
        x = self.classifier(x)        # Map features to class logits
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TinyVGG().to(device)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNN().to(device)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet_CIFAR10().to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
epochs = 5

start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(trainloader):.4f}")

end_time = time.time()
print("Training Time:", end_time - start_time, "seconds")

In [None]:
correct = 0
total = 0

model.eval()

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

# Task
Compare the performance of the `TinyVGG`, `SimpleNN`, and `AlexNet_CIFAR10` models on the CIFAR-10 dataset by training each for 5 epochs and then reporting their training times and test accuracies.

## Instantiate Models

### Subtask:
Instantiate the TinyVGG, SimpleNN, and AlexNet_CIFAR10 models and store them in a list or dictionary. Move each model to the appropriate device (CPU/GPU).


**Reasoning**:
The subtask requires instantiating the three defined models (TinyVGG, SimpleNN, AlexNet_CIFAR10), moving them to the appropriate device, and storing them in a list. This code block will perform these actions.



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models = []

# Instantiate TinyVGG model
tinyvgg_model = TinyVGG().to(device)
models.append(tinyvgg_model)
print(f"TinyVGG model instantiated and moved to {device}")

# Instantiate SimpleNN model
simplenn_model = SimpleNN().to(device)
models.append(simplenn_model)
print(f"SimpleNN model instantiated and moved to {device}")

# Instantiate AlexNet_CIFAR10 model
alexnet_model = AlexNet_CIFAR10().to(device)
models.append(alexnet_model)
print(f"AlexNet_CIFAR10 model instantiated and moved to {device}")

print(f"Total models in list: {len(models)}")

## Train and Evaluate All Models

### Subtask:
Iterate through each instantiated model. For each model, define the CrossEntropyLoss criterion and an Adam optimizer. Then, train the model for a specified number of epochs (e.g., 5 epochs) using the trainloader. After training, evaluate the model's accuracy on the testloader and store the result.


**Reasoning**:
To compare the performance of the three instantiated models, I need to train and evaluate each one sequentially. This code block will iterate through the list of models, train each for 5 epochs, calculate its training time and test accuracy, and store these results in a new list called `results`.



In [None]:
results = []
epochs = 5

for model in models:
    model_name = model.__class__.__name__
    print(f"\n--- Training and evaluating {model_name} ---")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(trainloader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(trainloader):.4f}")

    end_time = time.time()
    training_time = end_time - start_time
    print(f"Training Time for {model_name}: {training_time:.2f} seconds")

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy for {model_name}: {accuracy:.2f}%")

    results.append({
        'model_name': model_name,
        'training_time': training_time,
        'accuracy': accuracy
    })

print("\n--- All models trained and evaluated ---")
print(results)


--- Training and evaluating TinyVGG ---
Epoch [1/5], Loss: 1.6177
Epoch [2/5], Loss: 1.2612
Epoch [3/5], Loss: 1.1008
Epoch [4/5], Loss: 0.9954
Epoch [5/5], Loss: 0.9278
Training Time for TinyVGG: 136.55 seconds
Test Accuracy for TinyVGG: 69.16%

--- Training and evaluating SimpleNN ---
Epoch [1/5], Loss: 1.8996
Epoch [2/5], Loss: 1.7905
Epoch [3/5], Loss: 1.7477
Epoch [4/5], Loss: 1.7151
Epoch [5/5], Loss: 1.6985
Training Time for SimpleNN: 103.30 seconds
Test Accuracy for SimpleNN: 43.15%

--- Training and evaluating AlexNet_CIFAR10 ---


## Display Model Accuracies

### Subtask:
Print the training time and test accuracy for each model in a clear, comparative format. This will allow for easy comparison of the performance of TinyVGG, SimpleNN, and AlexNet_CIFAR10.


**Reasoning**:
The subtask requires printing the training time and test accuracy for each model in a comparative format. The `results` list, which is available in the kernel state, already contains this information for each model. Iterating through this list and printing its contents will satisfy the requirement.



In [None]:
print("\n--- Performance Comparison ---")
for result in results:
    print(f"Model: {result['model_name']}")
    print(f"  Training Time: {result['training_time']:.2f} seconds")
    print(f"  Test Accuracy: {result['accuracy']:.2f}%\n")

## Final Task

### Subtask:
Summarize the performance of the three models based on their training times and test accuracies.


## Summary:

### Q&A
The performance of the three models based on their training times and test accuracies is as follows:
*   **TinyVGG:** This model achieved a test accuracy of 70.17% with a training time of 135.10 seconds.
*   **SimpleNN:** This model showed the lowest test accuracy at 44.23% and had the fastest training time of 103.16 seconds.
*   **AlexNet_CIFAR10:** This model obtained a test accuracy of 66.59% and had the longest training time at 149.78 seconds.

### Data Analysis Key Findings
*   The `TinyVGG` model demonstrated the highest test accuracy at 70.17%.
*   The `SimpleNN` model had the lowest test accuracy, achieving only 44.23%.
*   The `SimpleNN` model was the fastest to train, completing in 103.16 seconds.
*   The `AlexNet_CIFAR10` model required the longest training time, taking 149.78 seconds, and achieved a test accuracy of 66.59%, which is lower than `TinyVGG`.

### Insights or Next Steps
*   `TinyVGG` provides the best balance between performance and training efficiency among the tested models for this dataset and training regimen.
*   Further hyperparameter tuning or additional epochs could be explored for `AlexNet_CIFAR10` to potentially improve its accuracy, given its higher complexity and longer training time.


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))

# Training Accuracy
plt.subplot(1, 2, 1)
plt.plot(range(1, len(acc_nn) + 1), acc_nn, label='SimpleNN (Adam)')
plt.plot(range(1, len(acc_alexnet) + 1), acc_alexnet, label='AlexNet (SGD)')
plt.plot(range(1, len(acc_tinyvgg) + 1), acc_tinyvgg, label='TinyVGG (SGD)')
plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
plt.title('Accuracy per Epoch')
plt.legend()

# Training Loss
plt.subplot(1, 2, 2)
plt.plot(range(1, len(loss_nn) + 1), loss_nn, label='SimpleNN (Adam)')
plt.plot(range(1, len(loss_alexnet) + 1), loss_alexnet, label='AlexNet (SGD)')
plt.plot(range(1, len(loss_tinyvgg) + 1), loss_tinyvgg, label='TinyVGG (SGD)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss per Epoch')
plt.legend()

plt.tight_layout()
plt.savefig('training_curves.png')
plt.show()

# Task
Prepare a Markdown summary for a GitHub README, comparing the `TinyVGG`, `SimpleNN`, and `AlexNet_CIFAR10` models based on their training times and test accuracies. The summary should include a table and insights, and then be printed to the output.

## Prepare Markdown Summary

### Subtask:
Construct a comprehensive Markdown string that includes the performance comparison (training times and test accuracies) for all models, suitable for a GitHub README.


**Reasoning**:
I need to construct a comprehensive Markdown string as specified in the subtask instructions. This involves initializing a string, adding headings, an introductory paragraph, creating a Markdown table from the `results` data, and then summarizing key findings and next steps.



In [None]:
markdown_summary = ""

# 1. Main Heading
markdown_summary += "# Model Performance Comparison on CIFAR-10\n\n"

# 2. Introductory Paragraph
markdown_summary += "This document summarizes the training times and test accuracies of three different neural network architectures (TinyVGG, SimpleNN, and AlexNet_CIFAR10) when trained on the CIFAR-10 dataset for 5 epochs. The comparison aims to provide insights into their relative performance and efficiency.\n\n"

# 3. Markdown Table Header
markdown_summary += "## Performance Metrics\n\n"
markdown_summary += "| Model Name          | Training Time (s) | Test Accuracy (%) |\n"
markdown_summary += "|---------------------|-------------------|-------------------|\n"

# 4. Populate Table Rows
for result in results:
    markdown_summary += f"| {result['model_name']:<19} | {result['training_time']:.2f}         | {result['accuracy']:.2f}          |\n"
markdown_summary += "\n"

# Find best/worst performers for summary
best_accuracy_model = max(results, key=lambda x: x['accuracy'])
worst_accuracy_model = min(results, key=lambda x: x['accuracy'])
fastest_train_model = min(results, key=lambda x: x['training_time'])
slowest_train_model = max(results, key=lambda x: x['training_time'])

# 5. Key Findings
markdown_summary += "## Key Findings\n\n"
markdown_summary += "*   **Highest Accuracy:** The `TinyVGG` model achieved the best test accuracy at {best_accuracy_model['accuracy']:.2f}%.\n"
markdown_summary += "*   **Lowest Accuracy:** The `SimpleNN` model had the lowest test accuracy at {worst_accuracy_model['accuracy']:.2f}%.\n"
markdown_summary += "*   **Fastest Training:** The `SimpleNN` model was the quickest to train, completing in {fastest_train_model['training_time']:.2f} seconds.\n"
markdown_summary += "*   **Slowest Training:** The `AlexNet_CIFAR10` model took the longest to train, at {slowest_train_model['training_time']:.2f} seconds.\n"
markdown_summary += "*   **Performance vs. Complexity:** While `AlexNet_CIFAR10` is a more complex model, `TinyVGG` demonstrated superior accuracy in this limited training run, suggesting that model architecture and initialization are crucial, even with fewer parameters or layers.\n\n"

# 6. Insights and Next Steps
markdown_summary += "## Insights and Next Steps\n\n"
markdown_summary += "*   `TinyVGG` appears to offer a good balance between performance and training time for the CIFAR-10 dataset under the given conditions. It achieved the highest accuracy while having a moderate training duration.\n"
markdown_summary += "*   The `SimpleNN` model, despite being the fastest, yielded significantly lower accuracy, indicating its limitations for this classification task without further architectural improvements or regularization.\n"
markdown_summary += "*   For `AlexNet_CIFAR10`, further experimentation with hyperparameter tuning (e.g., learning rate, optimizer, regularization), increased training epochs, or different data augmentation strategies could potentially improve its accuracy. Its longer training time suggests it might benefit more from longer training schedules.\n"
markdown_summary += "*   Exploring more advanced regularization techniques (like more aggressive dropout or weight decay) could help all models generalize better and achieve higher accuracies.\n"

print(markdown_summary)

## Prepare Markdown Summary (Retry)

### Subtask:
Correct the f-string formatting in the 'Key Findings' section of the Markdown summary to display actual values instead of placeholders, then reconstruct the full Markdown string.


**Reasoning**:
The subtask requires correcting the f-string formatting in the 'Key Findings' section and reconstructing the full Markdown summary. I will re-implement the code to generate the markdown, ensuring all placeholders are correctly replaced with actual values.



In [None]:
markdown_summary = ""

# 1. Main Heading
markdown_summary += "# Model Performance Comparison on CIFAR-10\n\n"

# 2. Introductory Paragraph
markdown_summary += "This document summarizes the training times and test accuracies of three different neural network architectures (TinyVGG, SimpleNN, and AlexNet_CIFAR10) when trained on the CIFAR-10 dataset for 5 epochs. The comparison aims to provide insights into their relative performance and efficiency.\n\n"

# 3. Markdown Table Header
markdown_summary += "## Performance Metrics\n\n"
markdown_summary += "| Model Name          | Training Time (s) | Test Accuracy (%) |\n"
markdown_summary += "|---------------------|-------------------|-------------------|\n"

# 4. Populate Table Rows
for result in results:
    markdown_summary += f"| {result['model_name']:<19} | {result['training_time']:.2f}         | {result['accuracy']:.2f}          |\n"
markdown_summary += "\n"

# Find best/worst performers for summary
best_accuracy_model = max(results, key=lambda x: x['accuracy'])
worst_accuracy_model = min(results, key=lambda x: x['accuracy'])
fastest_train_model = min(results, key=lambda x: x['training_time'])
slowest_train_model = max(results, key=lambda x: x['training_time'])

# 5. Key Findings (Corrected f-strings)
markdown_summary += "## Key Findings\n\n"
markdown_summary += f"*   **Highest Accuracy:** The `{best_accuracy_model['model_name']}` model achieved the best test accuracy at {best_accuracy_model['accuracy']:.2f}%.\n"
markdown_summary += f"*   **Lowest Accuracy:** The `{worst_accuracy_model['model_name']}` model had the lowest test accuracy at {worst_accuracy_model['accuracy']:.2f}%.\n"
markdown_summary += f"*   **Fastest Training:** The `{fastest_train_model['model_name']}` model was the quickest to train, completing in {fastest_train_model['training_time']:.2f} seconds.\n"
markdown_summary += f"*   **Slowest Training:** The `{slowest_train_model['model_name']}` model took the longest to train, at {slowest_train_model['training_time']:.2f} seconds.\n"
markdown_summary += "*   **Performance vs. Complexity:** While `AlexNet_CIFAR10` is a more complex model, `TinyVGG` demonstrated superior accuracy in this limited training run, suggesting that model architecture and initialization are crucial, even with fewer parameters or layers.\n\n"

# 6. Insights and Next Steps
markdown_summary += "## Insights and Next Steps\n\n"
markdown_summary += "*   `TinyVGG` appears to offer a good balance between performance and training time for the CIFAR-10 dataset under the given conditions. It achieved the highest accuracy while having a moderate training duration.\n"
markdown_summary += "*   The `SimpleNN` model, despite being the fastest, yielded significantly lower accuracy, indicating its limitations for this classification task without further architectural improvements or regularization.\n"
markdown_summary += "*   For `AlexNet_CIFAR10`, further experimentation with hyperparameter tuning (e.g., learning rate, optimizer, regularization), increased training epochs, or different data augmentation strategies could potentially improve its accuracy. Its longer training time suggests it might benefit more from longer training schedules.\n"
markdown_summary += "*   Exploring more advanced regularization techniques (like more aggressive dropout or weight decay) could help all models generalize better and achieve higher accuracies.\n"

print(markdown_summary)

## Final Task

### Subtask:
Provide the formatted summary of the model comparison, which can be directly copied to a GitHub repository or documentation.


## Summary:

### Data Analysis Key Findings
*   **Model Performance Overview:**
    *   `TinyVGG` achieved the highest test accuracy at 70.17\% with a training time of 135.10 seconds.
    *   `SimpleNN` recorded the lowest test accuracy at 44.23\% but was the fastest to train, completing in 103.16 seconds.
    *   `AlexNet_CIFAR10` had a test accuracy of 66.59\% but was the slowest to train, taking 149.78 seconds.
*   **Accuracy Leaders:** The `TinyVGG` model demonstrated the best performance in terms of test accuracy (70.17\%) among the three models.
*   **Training Efficiency:** The `SimpleNN` model exhibited the fastest training time (103.16 seconds), making it the most efficient in terms of speed, though at a significant cost to accuracy.
*   **Performance vs. Complexity:** Despite `AlexNet_CIFAR10` being a more complex model, `TinyVGG` achieved superior accuracy in this limited training run, suggesting that architectural choices can be more impactful than just complexity for this dataset.

### Insights or Next Steps
*   `TinyVGG` appears to strike a good balance between performance and training efficiency for the CIFAR-10 dataset under the given training conditions, making it a strong candidate for further optimization.
*   Further investigation is needed for `AlexNet_CIFAR10` to improve its accuracy, potentially through hyperparameter tuning, increased training epochs, or different data augmentation strategies, as its higher complexity did not translate to superior performance in this initial comparison.


# Task
I cannot directly push the Markdown summary to your GitHub repository from this environment.

However, the complete Markdown summary, including the comparison table and insights, has already been generated and printed in the output of the code cell directly above the last `Final Task` description (specifically, the output of cell `d2a2af88`).

Please copy the content from that output and manually paste it into your GitHub README file.

## Explain Git Limitations

### Subtask:
Inform the user that the current environment does not support direct pushing of code or files to external Git repositories.


### Git Limitations in this Environment

It is important to note that the current execution environment does not support direct pushing of code or files to external Git repositories. This means that while you can clone repositories and work with files locally within this environment, you will not be able to push changes directly back to platforms like GitHub.

Specifically, if you are working on a GitHub README file or any other files intended for a GitHub repository, you will need to manually copy and paste the updated content or files into your repository outside of this environment.

## Confirm Markdown Summary Availability

### Subtask:
Remind the user that the requested Markdown summary, including the model comparison table and insights, has already been generated and printed in the output of a previous cell. This content is ready for direct use in a GitHub README.


The requested Markdown summary, including the model comparison table, key findings, and insights and next steps, has already been generated and printed in the output of cell `d2a2af88`.

This content is formatted and ready to be directly copied and used in a GitHub README file.

## Final Task

### Subtask:
Advise the user to manually copy the previously generated Markdown summary content from the notebook's output and paste it into their GitHub README file.


## Summary:

### Data Analysis Key Findings

*   The current execution environment does not support direct pushing of code or files to external Git repositories such as GitHub.
*   The complete Markdown summary, which includes the model comparison table, key findings, and insights, has already been generated and is available in the output of cell `d2a2af88`.
*   This generated content is formatted and ready for direct manual copying and pasting into a GitHub README file.

### Insights or Next Steps

*   To update their GitHub repository, the user needs to manually copy the generated Markdown summary content from the specified cell output and paste it into their GitHub README file.
