<a href="https://colab.research.google.com/github/limi900/BoozAllen/blob/model-branch/classifier_for_booz_allen_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

In [2]:
from PIL import Image
import matplotlib.pyplot as plt
import os
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import torch.nn.functional as F

In [3]:
!git clone https://github.com/limi900/BoozAllen.git

Cloning into 'BoozAllen'...
remote: Enumerating objects: 19930, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (120/120), done.[K
remote: Total 19930 (delta 42), reused 124 (delta 21), pack-reused 19782 (from 2)[K
Receiving objects: 100% (19930/19930), 741.21 MiB | 62.73 MiB/s, done.
Resolving deltas: 100% (44/44), done.
Updating files: 100% (19780/19780), done.


In [4]:
os.chdir('/content/BoozAllen/garbage-dataset')

In [8]:
# Step 1: Define Transformations (Data Preprocessing)
transform_train = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

In [6]:
transform_test = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize to 128x128
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [9]:
path = '/content/BoozAllen/garbage-dataset'

dataset = datasets.ImageFolder(root = path, transform = transform_train)
num_elements = len(dataset)

# we shuffle elements in the dataset because ImageFolder just collected them without shuffling
new_positions = torch.randperm(num_elements).tolist()
shuffled_dataset = torch.utils.data.Subset(dataset, new_positions)

# We split the data in trainining and testing
training_percent = 0.8
training_data_size = int(training_percent * num_elements)  # 80% for training
testing_data_size = num_elements - training_data_size  # 20% for validation
training_data, testing_data = torch.utils.data.random_split(dataset, [training_data_size, testing_data_size])

# we create dataLoaders for both training and testing
train_loader = DataLoader(training_data, batch_size=32, shuffle=True)
test_loader = DataLoader(testing_data, batch_size=32, shuffle=True)

In [10]:
model = models.resnet18(pretrained=True)

# Modify the final fully connected layer to match the number of classes (e.g., 10 classes for garbage classification)
num_classes = 10  # Change this based on your dataset's number of classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 176MB/s]


In [11]:
# Set up device (CUDA or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Step 5: Move Model to Device (GPU if available, otherwise CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

Using device: cuda


In [12]:
num_epochs = 10  # You can adjust this based on convergence

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagate the error
        optimizer.step()  # Update weights

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

print("Training complete!")

Epoch [1/10], Loss: 0.7787
Epoch [2/10], Loss: 0.4902
Epoch [3/10], Loss: 0.3832
Epoch [4/10], Loss: 0.3018
Epoch [5/10], Loss: 0.2210
Epoch [6/10], Loss: 0.1942
Epoch [7/10], Loss: 0.1777
Epoch [8/10], Loss: 0.1376
Epoch [9/10], Loss: 0.1353
Epoch [10/10], Loss: 0.0968
Training complete!


In [13]:
model.eval()  # Set model to evaluation mode (disables dropout, batchnorm)

correct = 0
total = 0

with torch.no_grad():  # No need to compute gradients during evaluation
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the class with highest probability
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on test data: {accuracy:.2f}%')

Accuracy on test data: 86.21%


In [14]:
# Evaluate accuracy on the training data
model.eval()  # Set model to evaluation mode (disables dropout, batchnorm)

correct_train = 0
total_train = 0

with torch.no_grad():  # No need to compute gradients during evaluation
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the class with highest probability
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

train_accuracy = 100 * correct_train / total_train
print(f'Accuracy on training data: {train_accuracy:.2f}%')

Accuracy on training data: 97.76%


In [18]:
print(os.getcwd())  # Prints the current working directory
os.chdir('..')
print(os.getcwd())

/content/BoozAllen/garbage-dataset
/content/BoozAllen


In [21]:
# Save the model
model_dir = 'ml-model-training'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)


model_path = os.path.join(model_dir, 'garbage_classifier_resnet.pth')
torch.save(model.state_dict(), model_path)


In [22]:
!ls

BoozAllen.ipynb  eco-recyclr  garbage-dataset  ml-model-training  README.md


In [33]:
!git config --global user.name "Chris Dollo"
!git config --global user.email "dollochrisdavid@gmail.com"

In [34]:
!git status

On branch model-branch
nothing to commit, working tree clean


In [35]:
!git checkout -b model-branch

fatal: A branch named 'model-branch' already exists.


In [44]:
!git add .
!git commit -m "added the ml model"
!git push origin model-branch

On branch model-branch
nothing to commit, working tree clean
Enumerating objects: 5, done.
Counting objects: 100% (5/5), done.
Delta compression using up to 12 threads
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 39.61 MiB | 10.90 MiB/s, done.
Total 4 (delta 1), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
remote: 
remote: Create a pull request for 'model-branch' on GitHub by visiting:[K
remote:      https://github.com/limi900/BoozAllen/pull/new/model-branch[K
remote: 
To github.com:limi900/BoozAllen.git
 * [new branch]        model-branch -> model-branch


In [37]:
!ssh-keygen -t rsa -b 4096 -C "you@example.com"

Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): 
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /root/.ssh/id_rsa
Your public key has been saved in /root/.ssh/id_rsa.pub
The key fingerprint is:
SHA256:UySTaeMelDv/tPX8jaAc4yC+YepIKKVc93+may/C55A you@example.com
The key's randomart image is:
+---[RSA 4096]----+
|        o+.      |
|        B+       |
|       + o.      |
|        =.       |
|  .. . .S+       |
|.+. . . o.. . .  |
|+..   =E. oo.o o |
|.. . + =oB *o. .+|
|  ..o o.==@.  . +|
+----[SHA256]-----+


In [38]:
!cat ~/.ssh/id_rsa.pub


ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDG+MOmlrKwug+ha9uCsrJEv863dECfn5TQFbxtCiUUKt/TcuuB7xeUy4VGjiAVQ3rpU7v4C3kcgS0kTnAL/6XdoSDyuMmaJZFXnxs927iM+LuiM0XVV/1WBtBeh0Tjf9JtgU8vC2Alk59fK9pNZCm/D/YQ7GUA+8+4XP7vMkMq9WJWGvp5N1rpVKBVlJRnD0PKx791imsYCW/wayQrtDr/KP2XpdlC29IyqZCRHvZbRthQ7elRuvRVCUVAR47Iio62N8YKhgY8Q0h9U5ONKKRmGTqO2beCU66cgY5PI6V4hrqjNUeBTRT8l44CAOrTMbhoAsxgOo8GGYeGrB9WTztwKbXomKUncMTvHc3omYi7EJLL5v7TtsXRuzvnXngPCyICaDoZCQTpOSMv+cg8EwKplluHlCamDCAMiiPndnYHbG5x7/9mCJg/X2DAPdr31sKArWcQMPuLCmZsMP1dsrlREfwGyyD1N537qS5GnMFtL2YscIg83oah8WTFQg0FhDIxK9+//PiUL6bmA9HDU54yYP4ae8h4uOPYE9LuvXF4KVb3M99CZGBzYf0tJX9tClyR7Puuxs0tTsGONpPvk6pdjM3WoK60m08f3TP+hVrFjWITU0jSCuAh2mgVceS/ZhhPvhJusGRxXzMPis0IfZY+8RSEdcPeWhKqDA2wOjcdnQ== you@example.com


In [40]:
!ssh-keyscan github.com >> ~/.ssh/known_hosts

# github.com:22 SSH-2.0-48ecf9ce5
# github.com:22 SSH-2.0-48ecf9ce5
# github.com:22 SSH-2.0-48ecf9ce5
# github.com:22 SSH-2.0-48ecf9ce5
# github.com:22 SSH-2.0-48ecf9ce5


In [41]:
!ssh -T git@github.com

Hi chrisdollo! You've successfully authenticated, but GitHub does not provide shell access.


In [42]:
!ssh-add -l


Could not open a connection to your authentication agent.


In [43]:
!git remote set-url origin git@github.com:limi900/BoozAllen.git
