In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, models, datasets
from sklearn.model_selection import train_test_split

In [2]:
# Set the device (cuda if available, otherwise cpu)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Constants
NUM_CLASSES = 8  # Update this to the number of classes in your dataset
BATCH_SIZE = 32
IMAGE_SIZE =224

In [4]:
PATH = "organized_data_final_3"

In [5]:
# Data Transforms
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])

# Load the dataset
full_dataset = datasets.ImageFolder(root=PATH, transform=transform)

In [6]:
# Split the dataset
train_size = int(0.8 * len(full_dataset))
val_size = int(0.1 * len(full_dataset))
test_size = len(full_dataset) -train_size - val_size

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size,test_size]
)

In [7]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,num_workers=4)

In [8]:
# Load a pre-trained ResNet model
resnet = models.resnet50(pretrained=True)



In [9]:
# Modify the last fully connected layer for the new number of classes
resnet.fc = nn.Linear(resnet.fc.in_features,NUM_CLASSES)

In [10]:
layersToTrain = 6
layersForTraining = list(resnet.children())[-layersToTrain:]

# Set requires_grad to True for the parameters of the identified layers
for layer in layersForTraining:
    for param in layer.parameters():
        param.requires_grad = True


# Move the model to the device
resnet = resnet.to(DEVICE)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.001,momentum=0.9)

In [11]:
from tqdm import tqdm

In [12]:
NUM_EPOCHS = 10

for epoch in range(NUM_EPOCHS):
    resnet.train()

    # Use tqdm for a progress bar
    tqdm_train_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{NUM_EPOCHS}')

    for inputs, labels in tqdm_train_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation loop
    resnet.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            outputs = resnet(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct / total

    print(f"Epoch {epoch + 1}/{NUM_EPOCHS}, Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:12<00:00,  2.46it/s]


Epoch 1/10, Loss: 0.1436, Accuracy: 95.66%


Epoch 2/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:11<00:00,  2.47it/s]


Epoch 2/10, Loss: 0.1021, Accuracy: 97.14%


Epoch 3/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:15<00:00,  2.42it/s]


Epoch 3/10, Loss: 0.0852, Accuracy: 97.41%


Epoch 4/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:15<00:00,  2.42it/s]


Epoch 4/10, Loss: 0.0623, Accuracy: 98.31%


Epoch 5/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:16<00:00,  2.40it/s]


Epoch 5/10, Loss: 0.0612, Accuracy: 98.15%


Epoch 6/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:19<00:00,  2.37it/s]


Epoch 6/10, Loss: 0.0648, Accuracy: 98.15%


Epoch 7/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:13<00:00,  2.45it/s]


Epoch 7/10, Loss: 0.0599, Accuracy: 98.15%


Epoch 8/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:11<00:00,  2.47it/s]


Epoch 8/10, Loss: 0.0526, Accuracy: 98.41%


Epoch 9/10: 100%|████████████████████████████████████████████████████████████████████| 473/473 [03:11<00:00,  2.47it/s]


Epoch 9/10, Loss: 0.0513, Accuracy: 98.20%


Epoch 10/10: 100%|███████████████████████████████████████████████████████████████████| 473/473 [03:11<00:00,  2.47it/s]


Epoch 10/10, Loss: 0.0571, Accuracy: 98.41%


In [13]:
torch.save(resnet.state_dict(), 'AS.pth')

In [14]:
# Test the model
resnet.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        outputs = resnet(inputs)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = test_correct / test_total
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Test Accuracy: 98.10%


In [83]:
import torch
from torchvision import transforms
from PIL import Image

def predict_image(image_path, model, class_labels, device='cuda'):
    # Load and preprocess the image
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    image = Image.open(image_path).convert('RGB')  # Ensure the image is in RGB format
    input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension and move to the specified device

    # Move model to the same device as the input tensor
    model = model.to(device)

    # Make prediction
    with torch.no_grad():
        model.eval()
        output = model(input_tensor)

    # Get class probabilities and logits
    probabilities = torch.nn.Softmax(dim=1)(output)[0].tolist()
    logits = output[0].tolist()

    # Get the predicted class index
    predicted_index = torch.argmax(output).item()
    predicted_class = class_labels[predicted_index]

    return {
        'predicted_class': predicted_class,
        'probabilities': probabilities,
        'logits': logits
    }

# Example usage
image_path = "shoe.jpg"
prediction_result = predict_image(image_path, resnet, class_labels=['Athletic shoes', 'Boat', 'Flats', 'Heels', 'Knee High', 'Loafers', 'Oxford', "Sneakers"])

print("Predicted Class:", prediction_result['predicted_class'])
print("Class Probabilities:", prediction_result['probabilities'])
print("Logits:", prediction_result['logits'])


Predicted Class: Athletic shoes
Class Probabilities: [0.8158131837844849, 0.04029471427202225, 0.00024055864196270704, 0.0037294619251042604, 0.10230713337659836, 0.028387336060404778, 0.0020814158488065004, 0.007146227639168501]
Logits: [4.101933479309082, 1.0939682722091675, -4.02704381942749, -1.2859880924224854, 2.0257275104522705, 0.7436912059783936, -1.869203805923462, -0.6356671452522278]
