Author: Sofia
Date: April 24
Subject: Testing the dataset of oral diseases from Google named DSB on ResNet50 models trained by the dataset of oral diseases from Kaggle

#Data Preparation

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import necessary libraries
import os
import tensorflow as tf
import torch.nn as nn
from tensorflow.keras.models import load_model
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
import torchvision
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import torch
from torchvision.models import resnet50

from torch.utils.data import DataLoader

Mounted at /content/drive


In [2]:
# Initializations
image_size = (112,112)
batch_sizes = 32

# Specify the root directory where your images are located
main_DS_directory = '/content/New_Dataset' #'/content/oral-diseases'

# Specify the output directory to save the processed images
Preprocessed_DS_directory = '/content/dataset' #'/content/decreased_oral_diseases'

dataset_dir = '/content/dataset/DataSet B'
model_path = '/content/drive/MyDrive/QM/modelDSAfold1.h5'

# Specify the output directory to save the excel file
results_directory = '/content/drive/My Drive/QM/results.xlsx'
Plot_directory = '/content/drive/My Drive/QM/plots.xlsx'

# Dataset Preparation for testing

In [3]:
import zipfile

with zipfile.ZipFile('/content/drive/MyDrive/QM/New_Dataset.zip') as zipObj:
  members = [file for file in zipObj.namelist() if "__MACOSX" not in file]
  zipObj.extractall('/content/New_Dataset', members=members)

# Optionally, remove the __MACOSX directory if it was extracted
macosx_folder = os.path.join('/content/New_Dataset', "__MACOSX")
if os.path.exists(macosx_folder):
    os.rmdir(macosx_folder)


In [4]:
# Normalizing them to [0,1], Resizing to 112,112, 3
def normalize_and_resize_image(image, target_size):

    # Resize image into 112*112*3
    image = image.resize(target_size)

    # Normalize pixel values to the range [0, 1]
    image = np.array(image)
    image = image / 255.0

    # Convert the normalized numpy array back to PIL image
    image = Image.fromarray((image * 255).astype(np.uint8))

    return image

# Saving the normalized images into a new directory in Google colab with the same subdirectories and structure
def process_images_in_directory(directory, Preprocessed_DS_directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file has an image extension
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                # Construct the full path to the input image file
                image_path = os.path.join(root, file)
                if 'augmented' not in image_path and 'Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset' not in image_path:
                  # Open the image using PIL
                  image = Image.open(image_path)

                  # Ensure image is in RGB mode
                  image = image.convert("RGB")

                  # Normalize and Resize the image
                  #print(image_path)
                  processed_image = normalize_and_resize_image(image, image_size)

                  # Construct the full path to the output directory
                  output_subdirectory = os.path.relpath(root, directory)
                  output_path = os.path.join(Preprocessed_DS_directory, output_subdirectory)
                  os.makedirs(output_path, exist_ok=True)

                  # Save the processed image
                  filename = os.path.join(output_path, file)
                  processed_image.save(filename, format='JPEG')  # Change 'JPEG' to the desired format
                  #print(filename)
                  #if filename.find('augmented') == -1 or filename.find('Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset') == -1:
                  #np.save(filename, image)

In [5]:
# Call the function to process images in the directory
process_images_in_directory(main_DS_directory, Preprocessed_DS_directory)

In [6]:
# Define transformations by Composing several transforms together
transform = transforms.Compose([
    transforms.Resize(image_size),
    #Convert a PIL Image or ndarray to tensor
    transforms.ToTensor()
])

# Use ImageFolder to load your dataset
dataset = torchvision.datasets.ImageFolder(root = dataset_dir, transform=transform)

# Testing

In [7]:
model = resnet50(weights='ResNet50_Weights.DEFAULT')
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 6)

model.load_state_dict(torch.load(model_path))

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 129MB/s]


<All keys matched successfully>

In [8]:
# Assuming you have a PyTorch DataLoader for your test dataset
test_loader = DataLoader(dataset, batch_size=batch_sizes, shuffle=False)

# Set the model to evaluation mode
model.eval()

# Define a criterion for calculating the loss (e.g., cross-entropy loss)
criterion = nn.CrossEntropyLoss()

# Initialize variables for loss and accuracy
total_loss = 0.0
correct = 0
total_samples = 0

# Iterate over the test dataset
with torch.no_grad():  # Disable gradient calculation during evaluation
    for images, labels in test_loader:
        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, labels)
        total_loss += loss.item() * images.size(0)

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Calculate average loss and accuracy
average_loss = total_loss / total_samples
accuracy = correct / total_samples

print(f'Test Loss: {average_loss}')
print(f'Test Accuracy: {accuracy}')

Test Loss: 2.256048894466314
Test Accuracy: 0.462406015037594
