In [1]:
import shutil
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os
import cv2
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split  # Added explicit import
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.cuda.amp import autocast, GradScaler

In [3]:
# Copy cropsobel folder
src_cropsobel = '/content/drive/MyDrive/Colab Notebooks/Dental Scans/cropsobel'
dst_cropsobel = '/content/cropsobel'
try:
    shutil.copytree(src_cropsobel, dst_cropsobel, dirs_exist_ok=True)
    print("✅ Copied cropsobel folder to /content/cropsobel")
except Exception as e:
    print(f"Error copying cropsobel folder: {e}")
    exit()

# Extract training dataset
extract_path = '/content/dataset'
os.makedirs(extract_path, exist_ok=True)
zip_path = '/content/drive/MyDrive/Colab Notebooks/Dental Scans/cropped_jaw_dataset_sobel.zip'

try:
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("✅ Training dataset extraction complete!")
except FileNotFoundError:
    print(f"Error: Could not find the file at: {zip_path}")
    exit()
except Exception as e:
    print(f"Error extracting dataset: {e}")
    exit()

✅ Copied cropsobel folder to /content/cropsobel
✅ Training dataset extraction complete!


In [4]:
# Define paths
dataset_path = Path(extract_path) / "cropped_jaw_dataset_sobel"
test_path = Path("/content/cropsobel")
subfolders = ["above40", "AdditionalAbove40", "age17-40", "AdditionalAge17-40"]
output_path = Path(extract_path) / "clustered_autoencoder"
test_output_path = Path("/content/classify")
basket_normal = output_path / "normal"
basket_osteoporotic = output_path / "osteoporotic"
test_basket_normal = test_output_path / "normal"
test_basket_osteoporotic = test_output_path / "osteoporosis"

In [5]:
# Create output directories
for path in [basket_normal, basket_osteoporotic, test_basket_normal, test_basket_osteoporotic]:
    path.mkdir(parents=True, exist_ok=True)

# Custom Dataset
class OPGDataset(Dataset):
    def __init__(self, image_paths, labels=None, img_size=(128, 128), is_test=False):
        self.image_paths = image_paths
        self.labels = labels
        self.is_test = is_test
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,)),
            transforms.Lambda(lambda x: x.float()),
            *([] if is_test else [
                transforms.RandomRotation(10),
                transforms.RandomHorizontalFlip(),
                transforms.RandomAffine(degrees=0, scale=(0.9, 1.1))
            ])
        ])
        self.img_size = img_size
        self.folder_map = {str(p): s for s in subfolders for p in (dataset_path / s).glob("*.jpg")}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = cv2.imread(str(self.image_paths[idx]), cv2.IMREAD_GRAYSCALE)
        if img is None:
            raise ValueError(f"Failed to load image: {self.image_paths[idx]}")
        img = cv2.resize(img, self.img_size)
        img = img / 255.0
        img = self.transform(img)
        intensity = img.mean().item()
        if self.is_test:
            return img, intensity, str(self.image_paths[idx])
        return img, intensity, self.labels[idx] if self.labels is not None else 0


In [6]:

# CNN Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.convnet = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(64 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        return self.convnet(x)

In [7]:
# Load image paths and assign pseudo-labels
train_image_paths = []
train_labels = []
train_intensities = []
below_40 = ["age17-40", "AdditionalAge17-40"]
above_40 = ["above40", "AdditionalAbove40"]

for subfolder in subfolders:
    input_dir = dataset_path / subfolder
    for img_path in input_dir.glob("*.jpg"):
        img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
        if img is not None:
            train_image_paths.append(img_path)
            train_intensities.append(img.mean())
            if subfolder in below_40:
                train_labels.append(0)  # 100% normal
            else:
                # 85% osteoporotic, 15% normal
                train_labels.append(1 if np.random.rand() < 0.85 else 0)

# Verify counts
if len(train_image_paths) != 396:
    print(f"Error: Found {len(train_image_paths)} training images, expected 396. Exiting.")
    exit()
if len(train_labels) != 396 or len(train_intensities) != 396:
    print(f"Error: Inconsistent lengths - Images: {len(train_image_paths)}, Labels: {len(train_labels)}, Intensities: {len(train_intensities)}. Exiting.")
    exit()


In [8]:
# Load test image paths
test_image_paths = list(test_path.glob("*.jpg"))

# Verify counts
if len(train_image_paths) != 396:
    print(f"Error: Found {len(train_image_paths)} training images, expected 396. Exiting.")
    exit()
if len(test_image_paths) != 13:
    print(f"Error: Found {len(test_image_paths)} test images, expected 13. Exiting.")
    exit()

In [9]:
# Verify counts
if len(train_image_paths) != 396:
    print(f"Error: Found {len(train_image_paths)} training images, expected 396. Exiting.")
    exit()
if len(test_image_paths) != 13:
    print(f"Error: Found {len(test_image_paths)} test images, expected 13. Exiting.")
    exit()

In [10]:
# Split training data
train_idx, val_idx = train_test_split(range(396), test_size=0.2, stratify=train_labels, random_state=42)
train_paths = [train_image_paths[i] for i in train_idx]
val_paths = [train_image_paths[i] for i in val_idx]
val_labels = [train_labels[i] for i in val_idx] # Create val_labels from original train_labels
train_labels = [train_labels[i] for i in train_idx] # Then update train_labels

In [11]:
# Create datasets
try:
    train_dataset = OPGDataset(train_paths, train_labels)
    val_dataset = OPGDataset(val_paths, val_labels)
    test_dataset = OPGDataset(test_image_paths, is_test=True)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
except Exception as e:
    print(f"Error creating datasets: {e}")
    exit()

# Initialize model and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss(weight=torch.tensor([0.15, 0.85]).to(device))  # Weighted for 85% osteo
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scaler = GradScaler()
except Exception as e:
    print(f"Error initializing model: {e}")
    exit()


  scaler = GradScaler()


In [12]:
# Train CNN
num_epochs = 50
best_val_acc = 0.0
try:
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, _, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            running_loss += loss.item()

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, _, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_acc = correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Acc: {val_acc:.4f}")

        # Save checkpoint
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), '/content/model_checkpoint.pth')

        torch.cuda.empty_cache()
except Exception as e:
    print(f"Error during training: {e}")
    exit()

  with autocast():


Epoch [1/50], Loss: 0.3447, Val Acc: 0.6500
Epoch [2/50], Loss: 0.3650, Val Acc: 0.6500
Epoch [3/50], Loss: 0.3507, Val Acc: 0.6500
Epoch [4/50], Loss: 0.3287, Val Acc: 0.6500
Epoch [5/50], Loss: 0.3397, Val Acc: 0.6500
Epoch [6/50], Loss: 0.3072, Val Acc: 0.6500
Epoch [7/50], Loss: 0.3468, Val Acc: 0.6500
Epoch [8/50], Loss: 0.2677, Val Acc: 0.6500
Epoch [9/50], Loss: 0.2725, Val Acc: 0.6500
Epoch [10/50], Loss: 0.2696, Val Acc: 0.6750
Epoch [11/50], Loss: 0.2783, Val Acc: 0.6500
Epoch [12/50], Loss: 0.2821, Val Acc: 0.6875
Epoch [13/50], Loss: 0.2441, Val Acc: 0.6875
Epoch [14/50], Loss: 0.3098, Val Acc: 0.6500
Epoch [15/50], Loss: 0.2699, Val Acc: 0.6500
Epoch [16/50], Loss: 0.2553, Val Acc: 0.7250
Epoch [17/50], Loss: 0.2821, Val Acc: 0.7500
Epoch [18/50], Loss: 0.2333, Val Acc: 0.7500
Epoch [19/50], Loss: 0.2547, Val Acc: 0.7500
Epoch [20/50], Loss: 0.2254, Val Acc: 0.7750
Epoch [21/50], Loss: 0.2135, Val Acc: 0.7625
Epoch [22/50], Loss: 0.2248, Val Acc: 0.7250
Epoch [23/50], Loss

In [13]:
# Load best model
try:
    model.load_state_dict(torch.load('/content/model_checkpoint.pth'))
except Exception as e:
    print(f"Error loading checkpoint: {e}")
    exit()


In [14]:
# --- Save the final model weights ---

# Define the destination path and filename as requested
destination_location = "/content/drive/MyDrive/Colab Notebooks/Dental Scans/Labeled /CNN model"
file_name = "CNN_WEIGHTS_DENTAL_SCANS.H5"
full_path = os.path.join(destination_location, file_name)

# Ensure the destination directory exists, creating it if necessary
os.makedirs(destination_location, exist_ok=True)

# Save the model's state dictionary (weights and biases) to the specified file.
# Note: While the .h5 extension is commonly associated with Keras/TensorFlow,
# torch.save will save the PyTorch model's parameters in its own format.
torch.save(model.state_dict(), full_path)

print(f"✅ Model weights saved successfully to: {full_path}")

✅ Model weights saved successfully to: /content/drive/MyDrive/Colab Notebooks/Dental Scans/Labeled /CNN model/CNN_WEIGHTS_DENTAL_SCANS.H5


In [15]:
# Test classification
model.eval()
test_predictions = []
test_likelihoods = []
test_paths = []
try:
    with torch.no_grad():
        for images, intensities, paths in test_loader:
            images = images.to(device)
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()  # Osteoporotic prob
            for prob, intensity, path in zip(probs, intensities, paths):
                likelihood = min(max(prob + 0.15 * (intensity / 255.0), 0.0), 1.0)
                test_likelihoods.append(likelihood)
                test_predictions.append(1 if likelihood > 0.5 else 0)
                test_paths.append(path)
except Exception as e:
    print(f"Error during testing: {e}")
    exit()

In [16]:
# Save test images to baskets
for path, pred in zip(test_paths, test_predictions):
    img = cv2.imread(path)
    if img is None:
        print(f"Warning: Failed to save {Path(path).name}")
        continue
    output_dir = test_basket_osteoporotic if pred == 1 else test_basket_normal
    output_path = output_dir / Path(path).name
    cv2.imwrite(str(output_path), img)

In [17]:
# Save test classification summary
with open(test_output_path / "test_classification_summary.txt", "w") as f:
    f.write(f"Images in Test Normal basket: {sum(1 for p in test_predictions if p == 0)}\n")
    f.write(f"Images in Test Osteoporotic basket: {sum(1 for p in test_predictions if p == 1)}\n")
    f.write("\nOsteoporosis Likelihoods:\n")
    for path, likelihood in zip(test_paths, test_likelihoods):
        f.write(f"Image: {Path(path).name}, Likelihood of Osteoporosis: {likelihood:.2%}\n")

print("CNN classification complete. Check 'clustered_autoencoder' and 'classify' folders.")

CNN classification complete. Check 'clustered_autoencoder' and 'classify' folders.
