# BoneawareAI

By: Karthik Subramanian, Charles Green, Sai Anurag Pichika, Saarang Prabhuram


## Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install pyyaml==5.4.1
!pip install boto3
!pip install configparser
!pip install torch

Collecting pyyaml==5.4.1
  Using cached PyYAML-5.4.1.tar.gz (175 kB)
  Installing build dependencies ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Getting requirements to build wheel ... [?25l[?25herror
[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
[31m│[0m exit code: [1;36m1[0m
[31m╰─>[0m See above for output.

[1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.


In [3]:
import os
PROJECT_PATH = 'BoneawareAI'
GOOGLE_DRIVE_PATH = f'/content/drive/MyDrive/{PROJECT_PATH}'
os.chdir(GOOGLE_DRIVE_PATH)
os.getcwd()

'/content/drive/MyDrive/BoneawareAI'

In [4]:
# make sure you run this cell so that Boneaware src path is recognized
import sys
sys.path.append(GOOGLE_DRIVE_PATH) # this is important for the imports in the .py files to work
sys.path.append(os.path.join(GOOGLE_DRIVE_PATH, 'src'))

## Data Preprocessing
Get the dataset, perform data augmentation to get finalized MURA dataset

In [None]:
# # Downloading MURA dataset and unzipping the file (this one takes time)
# from src.data_loader import download_dataset
# from src.constants import DATASETS_FOLDER, MURA_DATASET
# from src.helpers.utils import unzip_file
# download_dataset(MURA_DATASET, DATASETS_FOLDER)
# unzip_file(os.path.join(os.getcwd(), DATASETS_FOLDER, MURA_DATASET))

File downloaded successfully to datasets/MURA-v1.1.zip
successfully unzipped the file at path /content/drive/MyDrive/BoneawareAI/datasets/MURA-v1.1.zip


In [9]:
import os
import pandas as pd
from PIL import Image
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm

In [10]:
# Enable faster convolutions
torch.backends.cudnn.benchmark = True

# 1. Define Data Transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

valid_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [11]:
# 2. Define Dataset Class
class MURABinaryDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for _, row in self.data.iterrows():
            study_path = os.path.join(root_dir, row['path'])
            label = row['label']
            self.image_paths.extend([os.path.join(study_path, img) for img in os.listdir(study_path) if img.endswith('.png')])
            self.labels.extend([label] * len([img for img in os.listdir(study_path) if img.endswith('.png')]))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, torch.tensor(label, dtype=torch.float32)
        except Exception as e:
            # Print warning and skip problematic file
            print(f"Warning: Skipping file {img_path} due to error: {e}")
            return None, None


In [12]:
# 3. Load Datasets
root_dir = '/content/drive/MyDrive/BoneawareAI/datasets/'
train_dataset = MURABinaryDataset(
    csv_file=os.path.join(root_dir, 'MURA-v1.1/train_labeled_studies.csv'),
    root_dir=root_dir,
    transform=train_transforms
)
valid_dataset = MURABinaryDataset(
    csv_file=os.path.join(root_dir, 'MURA-v1.1/valid_labeled_studies.csv'),
    root_dir=root_dir,
    transform=valid_transforms
)

# 4. Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)




In [13]:
# 5. Define Custom DenseNet
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate, dropout_rate=0.2):
        super(DenseLayer, self).__init__()
        self.layer = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, growth_rate, kernel_size=3, stride=1, padding=1, bias=False),
            nn.Dropout(dropout_rate)
        )

    def forward(self, x):
        new_features = self.layer(x)
        return torch.cat([x, new_features], dim=1)

class DenseBlock(nn.Module):
    def __init__(self, num_layers, in_channels, growth_rate, dropout_rate=0.2):
        super(DenseBlock, self).__init__()
        layers = []
        for _ in range(num_layers):
            layers.append(DenseLayer(in_channels, growth_rate, dropout_rate))
            in_channels += growth_rate
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)

class TransitionLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TransitionLayer, self).__init__()
        self.transition = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )

    def forward(self, x):
        return self.transition(x)

class DenseNet(nn.Module):
    def __init__(self, num_blocks, num_layers_per_block, growth_rate, reduction, num_classes=1):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate
        in_channels = 2 * growth_rate

        # Initial Convolution
        self.init_conv = nn.Sequential(
            nn.Conv2d(3, in_channels, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        # DenseBlocks with Transition Layers
        blocks = []
        for i in range(num_blocks):
            blocks.append(DenseBlock(num_layers_per_block, in_channels, growth_rate))
            in_channels += num_layers_per_block * growth_rate
            if i != num_blocks - 1:  # No transition after the last block
                out_channels = int(in_channels * reduction)
                blocks.append(TransitionLayer(in_channels, out_channels))
                in_channels = out_channels

        self.features = nn.Sequential(*blocks)

        # Classification Layer
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(in_channels, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.init_conv(x)
        x = self.features(x)
        x = self.classifier(x)
        return x

In [14]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the DenseNet
model = DenseNet(num_blocks=3, num_layers_per_block=4, growth_rate=32, reduction=0.5).to(device)

In [15]:
# 6. Define Loss, Optimizer, and Scheduler
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
# 7. Training Function with Progress Monitoring
def train_model(model, criterion, optimizer, train_loader, valid_loader, num_epochs=25):
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print('-' * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
                loader = train_loader
            else:
                model.eval()
                loader = valid_loader

            running_loss = 0.0
            running_corrects = 0

            progress_bar = tqdm(enumerate(loader), total=len(loader), desc=f"{phase} Progress")

            for i, (inputs, labels) in progress_bar:
                # Skip batches with problematic files
                inputs = [inp for inp in inputs if inp is not None]
                labels = [lbl for lbl in labels if lbl is not None]

                if len(inputs) == 0 or len(labels) == 0:
                    continue

                inputs = torch.stack(inputs).to(device)
                labels = torch.tensor(labels).to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    outputs = outputs.squeeze()
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                preds = (outputs > 0.5).float()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels)

                progress_bar.set_postfix(loss=loss.item())

            epoch_loss = running_loss / len(loader.dataset)
            epoch_acc = running_corrects.double() / len(loader.dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    print(f"Best Validation Accuracy: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model



In [None]:
# 8. Train the Model
model = train_model(model, criterion, optimizer, train_loader, valid_loader, num_epochs=10)

In [None]:
# Save the model
torch.save(model.state_dict(), 'densenet_mura.pth')

# Load the model
model.load_state_dict(torch.load('densenet_mura.pth'))
model.eval()


In [None]:
from sklearn.metrics import classification_report, roc_auc_score

def evaluate_model(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.float().to(device)
            outputs = model(inputs).squeeze()
            preds = (outputs > 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print("Classification Report:")
    print(classification_report(all_labels, all_preds))
    print(f"AUC-ROC: {roc_auc_score(all_labels, all_preds):.4f}")

# Evaluate on validation set
evaluate_model(model, valid_loader)
