In [64]:
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F

In [65]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the data using ImageFolder
train_dir = '/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/data/train/'
test_dir = '/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/data/test/'

train_data = datasets.ImageFolder(root=train_dir, transform=transform)
test_data = datasets.ImageFolder(root=test_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [99]:
#train_data.class_to_idx

idx2cls = {
    0: 'pizza',
    1: 'steak'
}

In [62]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)  # Output: 6, 60x60
        self.pool = nn.MaxPool2d(2, 2)  # Output: 6, 30x30
        self.conv2 = nn.Conv2d(6, 16, 5)  # Output: 16, 26x26
        self.conv3 = nn.Conv2d(16, 32, 5)  # Output: 32, 22x22
        self.fc1 = nn.Linear(32 * 4 * 4, 512)  # input size: 32*4*4
        self.fc2 = nn.Linear(512, 224)
        self.fc3 = nn.Linear(224, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return x

model = Model()


In [93]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5) # 6, 28, 28 color channel=3, out put channel(random)=6, kernel=5. input 32 pixel. [input(32)- kernel(5)]/strid(1) = 27 + 1 = 28 
        self.pool = nn.MaxPool2d(2, 2) # 6, 14, 14, after pooling, devide height(28)/2, weight(28)/2, nXn pool metrics 
        self.conv2 = nn.Conv2d(6, 16, 5) # 16, 10, 10 -> 16, 5,5
        self.fc1 = nn.Linear(16 * 5 * 5, 16)
        self.fc2 = nn.Linear(8, 84)
        self.fc3 = nn.Linear(4, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [88]:
loss_fun = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=.001)

In [89]:
epochs = 15

In [94]:
model.to('mps')

for epoch in range(epochs):

    model.train()
    tr_loss = 0.0
    correct_preds = 0  # To track correct predictions
    total_preds = 0  # To track total predictions

    for img, label in train_loader:
        img, label = img.to('mps'), label.unsqueeze(-1).to('mps').float()
        
        optimizer.zero_grad()
        output = model(img)
        
        # Calculate loss
        loss = loss_fun(output, label)
        loss.backward()
        optimizer.step()
        
        tr_loss += loss.item()

        # Calculate predictions
        # Use a threshold of 0.5 to classify the predictions as 0 or 1
        preds = torch.round(torch.sigmoid(output))  # Applying sigmoid and rounding to get binary output
        correct_preds += (preds == label).sum().item()  # Count correct predictions
        total_preds += label.size(0)  # Count total samples
    
    tr_loss /= len(train_loader)
    accuracy = (correct_preds / total_preds) * 100  # Calculate accuracy in percentage

    print(f"Epoch: {epoch+1}  Training Loss: {tr_loss:.4f}  Accuracy: {accuracy:.2f}%")


Epoch: 1  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 2  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 3  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 4  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 5  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 6  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 7  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 8  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 9  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 10  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 11  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 12  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 13  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 14  Training Loss: 0.0000  Accuracy: 100.00%
Epoch: 15  Training Loss: 0.0000  Accuracy: 100.00%


In [95]:
model.to('mps')

# Evaluation loop
model.eval()  # Set the model to evaluation mode
eval_loss = 0.0
correct_preds = 0
total_preds = 0

with torch.no_grad():  # Disable gradient calculation
    for img, label in test_loader:
        img, label = img.to('mps'), label.unsqueeze(-1).to('mps').float()
        
        output = model(img)

        # Calculate loss
        loss = loss_fun(output, label)
        eval_loss += loss.item()

        # Calculate predictions
        preds = torch.round(torch.sigmoid(output))  # Apply sigmoid and round to get binary predictions
        correct_preds += (preds == label).sum().item()  # Count correct predictions
        total_preds += label.size(0)  # Count total samples

# Calculate average loss and accuracy
eval_loss /= len(test_loader)
accuracy = (correct_preds / total_preds) * 100

print(f"Test Loss: {eval_loss:.4f}  Test Accuracy: {accuracy:.2f}%")


Test Loss: 3.0697  Test Accuracy: 85.00%


In [102]:
# inference 

new_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def load_image(img_path):
    image = Image.open(img_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_path = [
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/p1.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/p2.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/p3.jpg",
              '/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/p4.jpg',
              '/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/p5.jpg',
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s1.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s2.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s3.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s4.webp",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s5.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s6.JPG",
              #"/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s7.avif",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s8.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s9.jpg",
              "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/pizza_steak_classification/s10.jpg"
              ]
#image_path = ["p1.jpg", "p2.jpg,","p3.jpg", "p4.jpg,", "p5.jpg"]
images = [load_image(img) for img in image_path]

model.eval()
with torch.no_grad():
    for img in images:
        img = img.to("mps")
        out = model(img)
        preds = torch.round(torch.sigmoid(out))
        print(idx2cls[int(preds.item())])



pizza
pizza
pizza
steak
pizza
steak
pizza
steak
steak
pizza
steak
steak
pizza
pizza
