<a href="https://colab.research.google.com/github/rounak393/imageprocessing/blob/main/cifar_10_inception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.models import inception_v3
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [2]:
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False)

classes = trainset.classes


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


In [4]:
model = inception_v3(pretrained=True, aux_logits=True)
model.fc = nn.Linear(model.fc.in_features, 10)  # CIFAR-10 has 10 classes
model = model.to(device)


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 122MB/s] 


In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [7]:
epochs = 2

for epoch in range(epochs):
    running_loss = 0.0
    model.train()

    for i, data in enumerate(trainloader):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        # Access the main output logits from InceptionOutputs
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

[1, 100] loss: 1.056
[1, 200] loss: 0.459
[1, 300] loss: 0.401
[1, 400] loss: 0.315
[1, 500] loss: 0.307
[1, 600] loss: 0.287
[1, 700] loss: 0.275
[1, 800] loss: 0.263
[1, 900] loss: 0.235
[1, 1000] loss: 0.224
[1, 1100] loss: 0.254
[1, 1200] loss: 0.231
[1, 1300] loss: 0.221
[1, 1400] loss: 0.225
[1, 1500] loss: 0.209
[2, 100] loss: 0.124
[2, 200] loss: 0.116
[2, 300] loss: 0.140
[2, 400] loss: 0.125
[2, 500] loss: 0.122
[2, 600] loss: 0.124
[2, 700] loss: 0.133
[2, 800] loss: 0.149
[2, 900] loss: 0.121
[2, 1000] loss: 0.138
[2, 1100] loss: 0.140
[2, 1200] loss: 0.117
[2, 1300] loss: 0.123
[2, 1400] loss: 0.134
[2, 1500] loss: 0.129


In [8]:
correct = 0
total = 0
model.eval()

with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on test set: {100 * correct / total:.2f}%")


Accuracy on test set: 95.09%


In [12]:
from google.colab import files  # Only for Colab
from PIL import Image
import torchvision.transforms as transforms
import torch

# Preprocessing for Inception v3
preprocess = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load class labels
imagenet_classes = requests.get(
    "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
).text.strip().split("\n")

def classify_uploaded_image():
    print("Please upload an image file (JPG/PNG):")
    uploaded = files.upload()

    if not uploaded:
        print("No file uploaded.")
        return

    filename = next(iter(uploaded))
    try:
        image = Image.open(filename).convert("RGB")
    except Exception as e:
        print("Could not open the image. Error:", e)
        return

    input_tensor = preprocess(image).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        probs = torch.nn.functional.softmax(output[0], dim=0)
        top5 = torch.topk(probs, 5)

    print("\nTop 5 Predictions:")
    for i in range(5):
        idx = top5.indices[i].item()
        confidence = top5.values[i].item() * 100
        print(f"{i+1}. {imagenet_classes[idx]} ({confidence:.2f}%)")


In [15]:
classify_uploaded_image()


Please upload an image file (JPG/PNG):


Saving cat.jfif to cat (1).jfif

Top 5 Predictions:
1. tiger shark (85.76%)
2. tench (13.38%)
3. great white shark (0.40%)
4. hen (0.13%)
5. electric ray (0.12%)
