In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import os
from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/MyDrive/compiled_data'

Mounted at /content/drive


In [None]:
from torchvision import transforms

# Update the data augmentation and normalization steps
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, datasets, transforms
import os

# Load the pre-trained ResNet-18 model
model = models.resnet50(pretrained=True)

# Freeze all the layers except the final classification layer
for name, param in model.named_parameters():
    if "fc" in name:  # Unfreeze the final classification layer
        param.requires_grad = True
    else:
        param.requires_grad = False

# Modify the final layer to match the number of classes in your dataset (e.g., 2 classes)
num_classes = 3  # Change this to match your dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)  # Only optimize the final layer

# Define a learning rate scheduler
from torch.optim import lr_scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 121MB/s]


In [None]:
import torch.optim as optim
from torch.optim import lr_scheduler

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Define a learning rate scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [None]:
# creating data loaders and image datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}


In [None]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print(dataset_sizes)

class_names = {0: "NORMAL", 1: "PNEUMONIA", 2:"TUBERCULOSIS"}
class_names
class_indices = {"NORMAL": 0, "PNEUMONIA": 1, "TUBERCULOSIS":2}

{'train': 7720, 'val': 27}




In [None]:
# Training loop using 5 epochs for now
num_epochs = 50
for epoch in range(num_epochs):
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

print("Training complete!")

train Loss: 0.6292 Acc: 0.7460
val Loss: 1.0569 Acc: 0.5556
train Loss: 0.5592 Acc: 0.7868
val Loss: 0.7057 Acc: 0.7037
train Loss: 0.5317 Acc: 0.8022
val Loss: 0.4275 Acc: 0.8148
train Loss: 0.5716 Acc: 0.7902
val Loss: 0.5547 Acc: 0.7778
train Loss: 0.5400 Acc: 0.7974
val Loss: 0.6465 Acc: 0.7037
train Loss: 0.5323 Acc: 0.7984
val Loss: 0.9101 Acc: 0.6667


In [None]:
# Saving the model
torch.save(model.state_dict(), 'lung_morbidity_classification.pth')

In [None]:
from torchvision import transforms
from PIL import Image

# Defining the preprocessing function
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    input_tensor = preprocess(image)
    input_batch = input_tensor.unsqueeze(0)  # Add a batch dimension
    return input_batch


In [None]:
import os
from sklearn.metrics import accuracy_score

# Defining the test directory path
test_dir = '/content/drive/MyDrive/compiled_data/test'  # Replace with the path to your test directory

# Storing the actual and predicted labels
actual_labels = []
predicted_labels = []

# Iterating over each class directory
for class_name in os.listdir(test_dir):
    class_dir = os.path.join(test_dir, class_name)
    if not os.path.isdir(class_dir):
        continue

    # Iterating over each image in the class directory
    for image_name in os.listdir(class_dir):
        image_path = os.path.join(class_dir, image_name)
        if not os.path.isfile(image_path):
            continue

        # Preprocessing the image and making predictions
        input_batch = preprocess_image(image_path).to(device)
        with torch.no_grad():
            outputs = model(input_batch)
            _, preds = torch.max(outputs, 1)

        predicted_class = preds.item()
        actual_class = class_indices[class_name]

        # Appending the actual and predicted labels
        actual_labels.append(actual_class)
        predicted_labels.append(predicted_class)

# Calculating accuracy
accuracy = accuracy_score(actual_labels, predicted_labels)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Comparing each prediction
for actual, predicted in zip(actual_labels, predicted_labels):
    print(f'Actual: {actual}, Predicted: {predicted}')
