##### from google.colab import drive

#drive.mount('/content/drive')

In [None]:
import os

import matplotlib.pyplot as plt

import numpy as np

from torchvision import datasets, transforms

from torch.utils.data import DataLoader

import torchvision





# Path to the dataset in Google Drive

data_dir = '/kaggle/input/infosys-mri-dataset'



# data augmentation techniques

data_transforms = {

    'train': transforms.Compose([

        transforms.RandomHorizontalFlip(),    # flip horizontally

        transforms.RandomVerticalFlip(),      # flip vertically

        transforms.RandomRotation(20),        # rotation up to 20 degrees

        transforms.RandomResizedCrop(224),    # crop and resize to 224x224

        transforms.ColorJitter(brightness=0.1, contrast=0.1),  # Adjust brightness/contrast

        transforms.ToTensor(),               # Convert images to PyTorch tensors

        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet mean and std





    ]),

    'val': transforms.Compose([

        transforms.Resize(224),               # Resize validation images to a fixed size

        transforms.ToTensor()                 # Convert images to PyTorch tensors

    ]),

    'test':transforms.Compose([

        transforms.Resize(224),

        transforms.ToTensor()

    ])



}



# Load the dataset

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform=data_transforms[x])

                  for x in ['train', 'val','test']}



# Create data loaders

dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True)

               for x in ['train', 'val','test']}



# Get class names

class_names = image_datasets['train'].classes

print(f"Classes: {class_names}")  # Should print: ['benign', 'malignant']



# Count the number of images in each class for train and val datasets

def count_class_distribution(dataset):

    benign_count = 0

    malignant_count = 0

    for _, label in dataset.samples:

        if label == 0:

            benign_count += 1

        else:

            malignant_count += 1

    return benign_count, malignant_count



# Count distribution in training and validation sets

train_benign, train_malignant = count_class_distribution(image_datasets['train'])

val_benign, val_malignant = count_class_distribution(image_datasets['val'])

test_benign, test_malignant = count_class_distribution(image_datasets['test'])



print(f"Training set - Benign: {train_benign}, Malignant: {train_malignant}")

print(f"Validation set - Benign: {val_benign}, Malignant: {val_malignant}")

print(f"Validation set - Benign: {test_benign}, Malignant: {test_malignant}")


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image

from skimage.feature import hog

from skimage import exposure



# Load the image

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')  # Convert to grayscale



# Resize the image to 224x224

image = image.resize((224, 224))



# Convert image to a NumPy array

image_np = np.array(image)



# Plot the original image, HOG image, and histogram of pixel values

plt.figure(figsize=(18, 6))



# Display the original grayscale image

plt.subplot(1, 2, 1)

plt.imshow(image_np, cmap='gray')

plt.title('Original Grayscale Image')

plt.axis('off')





# Plot the histogram of pixel values (0-255)

plt.subplot(1, 2,2)

plt.hist(image_np.ravel(), bins=256, range=(0, 255), color='black', alpha=0.7)

plt.title('Histogram of Pixel Values (0-255)')

plt.xlabel('Pixel Value (0-255)')

plt.ylabel('Frequency')



plt.tight_layout()

plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image

from skimage.feature import hog

from skimage import exposure



# Load the image

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')  # Convert to grayscale

image = image.resize((224, 224))

image_np = np.array(image)



# Perform HOG feature extraction

hog_features, hog_image = hog(image_np, orientations=8, pixels_per_cell=(16, 16),

                              cells_per_block=(1, 1), visualize=True, channel_axis=None)



# Rescale HOG image for better visualization

hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))



# Plot the original and HOG images

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)

plt.imshow(image_np, cmap='gray')

plt.title('Original Grayscale Image')



plt.subplot(1, 2, 2)

plt.imshow(hog_image_rescaled, cmap='gray')

plt.title('HOG Features')



plt.show()



print("Image Array:")

print(image_np)

print("HOG Features Shape:", hog_features.shape)


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image

from numba import prange



# Load the image

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')  # Convert to grayscale

image = image.resize((224, 224))

image_np = np.array(image)



# Sobel kernels

sobel_x = np.array([[-1, 0, 1],

                    [-2, 0, 2],

                    [-1, 0, 1]])



sobel_y = np.array([[-1, -2, -1],

                    [0, 0, 0],

                    [1, 2, 1]])



# Function to perform convolution using the kernel

def convolve(x, h):

    xh, xw = x.shape

    hh, hw = h.shape

    # Kernel radius

    rh, rw = np.array(h.shape)//2

    # Init output

    output = np.zeros(x.shape)

    for n1 in prange(rh, xh-rh):

        for n2 in prange(rw, xw-rw):

            value = 0

            for k1 in prange(hh):

                for k2 in prange(hw):

                    value += h[k1, k2]*x[n1 + k1 - rh, n2 + k2 - rw]

            output[n1, n2] = value

    return output



# Apply convolution using Sobel X and Y kernels

gradient_x = convolve(image_np, sobel_x)

gradient_y = convolve(image_np, sobel_y)



# Compute the gradient magnitude

gradient_magnitude = np.sqrt(gradient_x**2 + gradient_y**2)



# Normalize the result for visualization (to range 0-1)

gradient_magnitude = (gradient_magnitude - gradient_magnitude.min()) / (gradient_magnitude.max() - gradient_magnitude.min())



# Display the original and gradient magnitude images

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)

plt.title('Original Image')

plt.imshow(image_np, cmap='gray')

plt.axis('off')



plt.subplot(1, 2, 2)

plt.title('Gradient Magnitude: Sobel operator')

plt.imshow(gradient_magnitude, cmap='gray')

plt.axis('off')



plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



# Load the image (Ensure path correctness)

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')  # Convert to grayscale

image = image.resize((224, 224))

image_np = np.array(image)



def getLBPimage(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)

    neighboor = 3  # Define a 3x3 neighborhood



    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            center = img[1, 1]  # The center pixel

            # Step 2: Create a binary pattern by thresholding

            img01 = (img >= center).astype(int)

            # Step 3: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 4: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 5: Assign this number to the LBP image

            imgLBP[ih, iw] = num



    return imgLBP



# Apply LBP to the single image

imgLBP = getLBPimage(image_np)



# Flatten the LBP image for histogram

vecimgLBP = imgLBP.flatten()



# Plot the original image, LBP image, and histogram

fig = plt.figure(figsize=(20, 8))



# Plot original grayscale image

ax = fig.add_subplot(1, 3, 1)

ax.imshow(image_np, cmap="gray")

ax.set_title("Grayscale Image")



# Plot LBP converted image

ax = fig.add_subplot(1, 3, 2)

ax.imshow(imgLBP, cmap="gray")

ax.set_title("LBP Converted Image")



# Plot LBP histogram

ax = fig.add_subplot(1, 3, 3)

freq, lbp, _ = ax.hist(vecimgLBP, bins=256)

ax.set_ylim(0, 40000)

lbp = lbp[:-1]



# Print LBP values with high frequencies

largeTF = freq > 5000

for x, fr in zip(lbp[largeTF], freq[largeTF]):

    ax.text(x, fr, "{:6.0f}".format(x), color="magenta")

ax.set_title("LBP Histogram")



plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



# Load the image (Ensure path correctness)

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')  # Convert to grayscale

image = image.resize((224, 224))

image_np = np.array(image)



def getLBPimageUsingMean(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape using mean value

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)

    neighboor = 3  # Define a 3x3 neighborhood

    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            mean_value = np.mean(img)  # Calculate mean of the neighborhood

            # Step 2: Create a binary pattern by thresholding with mean value

            img01 = (img >= mean_value).astype(int)

            # Step 3: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 4: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 5: Assign this number to the LBP image

            imgLBP[ih, iw] = num



    return imgLBP



# Apply LBP to the single image using mean value

imgLBP = getLBPimageUsingMean(image_np)



# Flatten the LBP image for histogram

vecimgLBP = imgLBP.flatten()



# Plot the original image, LBP image, and histogram

fig = plt.figure(figsize=(20, 8))



# Plot original grayscale image

ax = fig.add_subplot(1, 3, 1)

ax.imshow(image_np, cmap="gray")

ax.set_title("Grayscale Image")



# Plot LBP converted image

ax = fig.add_subplot(1, 3, 2)

ax.imshow(imgLBP, cmap="gray")

ax.set_title("Mean-based LBP Converted Image")



# Plot LBP histogram

ax = fig.add_subplot(1, 3, 3)

freq, lbp, _ = ax.hist(vecimgLBP, bins=256)

ax.set_ylim(0, 40000)

lbp = lbp[:-1]



# Print LBP values with high frequencies

largeTF = freq > 5000

for x, fr in zip(lbp[largeTF], freq[largeTF]):

    ax.text(x, fr, "{:6.0f}".format(x), color="magenta")

ax.set_title("LBP Histogram")



plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



# Load the image (Ensure path correctness)

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')

image = image.resize((224, 224))

image_np = np.array(image)



def getLBPimageUsingMedian(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape using median value

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)

    neighboor = 3  # Define a 3x3 neighborhood

    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            median_value = np.median  (img)  # Calculate median of the neighborhood

            # Step 2: Create a binary pattern by thresholding with median value

            img01 = (img >= median_value).astype(int)

            # Step 3: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 4: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 5: Assign this number to the LBP image

            imgLBP[ih, iw] = num



    return imgLBP



# Apply LBP to the single image using median value

imgLBP = getLBPimageUsingMedian(image_np)



# Flatten the LBP image for histogram

vecimgLBP = imgLBP.flatten()



# Plot the original image, LBP image, and histogram

fig = plt.figure(figsize=(20, 8))



# Plot original grayscale image

ax = fig.add_subplot(1, 3, 1)

ax.imshow(image_np, cmap="gray")

ax.set_title("Grayscale Image")



# Plot LBP converted image

ax = fig.add_subplot(1, 3, 2)

ax.imshow(imgLBP, cmap="gray")

ax.set_title("Median-based LBP Converted Image")



# Plot LBP histogram

ax = fig.add_subplot(1, 3, 3)

freq, lbp, _ = ax.hist(vecimgLBP, bins=256)

ax.set_ylim(0, 40000)

lbp = lbp[:-1]



# Print LBP values with high frequencies

largeTF = freq > 5000

for x, fr in zip(lbp[largeTF], freq[largeTF]):

    ax.text(x, fr, "{:6.0f}".format(x), color="magenta")

ax.set_title("LBP Histogram")



plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



# Load the image (Ensure path correctness)

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')

image = image.resize((224, 224))

image_np = np.array(image)



def getLBPimageUsingVariance(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape using variance

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)

    neighboor = 3  # Define a 3x3 neighborhood

    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            variance_value = np.var(img)  # Calculate variance of the neighborhood

            # Step 2: Create a binary pattern by thresholding with the variance

            img01 = (img >= variance_value).astype(int)

            # Step 3: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 4: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 5: Assign this number to the LBP image

            imgLBP[ih, iw] = num



    return imgLBP



# Apply LBP to the single image using variance

imgLBP = getLBPimageUsingVariance(image_np)

# Flatten the LBP image for histogram

vecimgLBP = imgLBP.flatten()

# Plot the original image, LBP image, and histogram

fig = plt.figure(figsize=(20, 8))

# Plot original grayscale image

ax = fig.add_subplot(1, 3, 1)

ax.imshow(image_np, cmap="gray")

ax.set_title("Grayscale Image")

# Plot LBP converted image

ax = fig.add_subplot(1, 3, 2)

ax.imshow(imgLBP, cmap="gray")

ax.set_title("Variance-based LBP Converted Image")

# Plot LBP histogram

ax = fig.add_subplot(1, 3, 3)

freq, lbp, _ = ax.hist(vecimgLBP, bins=256)

ax.set_ylim(0, 40000)

lbp = lbp[:-1]

# Print LBP values with high frequencies

largeTF = freq > 5000

for x, fr in zip(lbp[largeTF], freq[largeTF]):

    ax.text(x, fr, "{:6.0f}".format(x), color="magenta")

ax.set_title("LBP Histogram")

plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



# Load the image (Ensure path correctness)

image_path = '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg'

image = Image.open(image_path).convert('L')

image = image.resize((224, 224))

image_np = np.array(image)



def getLBPimageUsingMVM(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape using Mean-Variance-Median (MVM) metric

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)



    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            # Step 2: Calculate Mean, Variance, and Median of the neighborhood

            mean_value = np.mean(img)

            variance_value = np.var(img)

            median_value = np.median(img)

            # Step 3: Calculate MVM threshold

            mvm_threshold = (mean_value + np.sqrt(variance_value) + median_value) / 3

            # Step 4: Create a binary pattern by thresholding with MVM threshold

            img01 = (img >= mvm_threshold).astype(int)

            # Step 5: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 6: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 7: Assign this number to the LBP image

            imgLBP[ih, iw] = num

    return imgLBP



# Apply MVM-LBP to the single image

imgLBP = getLBPimageUsingMVM(image_np)

# Flatten the MVM-LBP image for histogram

vecimgLBP = imgLBP.flatten()

# Plot the original image, MVM-LBP image, and histogram

fig = plt.figure(figsize=(20, 8))

# Plot original grayscale image

ax = fig.add_subplot(1, 3, 1)

ax.imshow(image_np, cmap="gray")

ax.set_title("Grayscale Image")

# Plot MVM-based LBP converted image

ax = fig.add_subplot(1, 3, 2)

ax.imshow(imgLBP, cmap="gray")

ax.set_title("MVM-based LBP Converted Image")

# Plot MVM-LBP histogram

ax = fig.add_subplot(1, 3, 3)

freq, lbp, _ = ax.hist(vecimgLBP, bins=256)

ax.set_ylim(0, 40000)

lbp = lbp[:-1]



# Print MVM-LBP values with high frequencies

largeTF = freq > 5000

for x, fr in zip(lbp[largeTF], freq[largeTF]):

    ax.text(x, fr, "{:6.0f}".format(x), color="magenta")

ax.set_title("MVM-LBP Histogram")



plt.show()


In [None]:
import numpy as np

import matplotlib.pyplot as plt

from PIL import Image



def getLBPimageUsingMVM(gray_image):

    '''

    == Input ==

    gray_image  : grayscale image of shape (height, width)



    == Output ==

    imgLBP : LBP converted image of the same shape using Mean-Variance-Median (MVM) metric

    '''

    # Initialize LBP image with zeros

    imgLBP = np.zeros_like(gray_image)



    # Iterate over each pixel (excluding borders)

    for ih in range(1, gray_image.shape[0] - 1):

        for iw in range(1, gray_image.shape[1] - 1):

            # Step 1: Get the 3x3 neighborhood around the pixel

            img = gray_image[ih-1:ih+2, iw-1:iw+2]  # 3x3 window

            # Step 2: Calculate Mean, Variance, and Median of the neighborhood

            mean_value = np.mean(img)

            variance_value = np.var(img)

            median_value = np.median(img)

            # Step 3: Calculate MVM threshold

            mvm_threshold = (mean_value + np.sqrt(variance_value) + median_value) / 3

            # Step 4: Create a binary pattern by thresholding with MVM threshold

            img01 = (img >= mvm_threshold).astype(int)

            # Step 5: Flatten the matrix into a vector (excluding the center)

            img01_vector = np.delete(img01.flatten(), 4)

            # Step 6: Convert the binary pattern to a decimal number

            num = np.dot(img01_vector, 2 ** np.arange(8))

            # Step 7: Assign this number to the LBP image

            imgLBP[ih, iw] = num

    return imgLBP



# List of image paths

image_paths = [

    '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2127/SUB7/p-049.jpg',

    '/kaggle/input/infosys-mri-dataset/train/Benign/BreaDM-Be-1805/SUB1/p-025.jpg',

    '/kaggle/input/infosys-mri-dataset/train/Benign/BreaDM-Be-1805/VIBRANT+C1/p-029.jpg',

    '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2117/SUB5/p-085.jpg',

    '/kaggle/input/infosys-mri-dataset/train/Malignant/BreaDM-Ma-2128/SUB8/p-052.jpg' ]



# Create a figure for plotting

fig, axs = plt.subplots(len(image_paths), 3, figsize=(20, 4 * len(image_paths)))



# Process each image

for idx, image_path in enumerate(image_paths):

    # Load and preprocess the image

    image = Image.open(image_path).convert('L')

    image = image.resize((224, 224))

    image_np = np.array(image)



    # Apply MVM-LBP to the single image

    imgLBP = getLBPimageUsingMVM(image_np)

    # Flatten the MVM-LBP image for histogram

    vecimgLBP = imgLBP.flatten()



    # Plot original grayscale image

    axs[idx, 0].imshow(image_np, cmap="gray")

    axs[idx, 0].set_title(f"Grayscale Image {idx + 1}")

    axs[idx, 0].axis('off')



    # Plot MVM-based LBP converted image

    axs[idx, 1].imshow(imgLBP, cmap="gray")

    axs[idx, 1].set_title(f"MVM-based LBP Image {idx + 1}")

    axs[idx, 1].axis('off')



    # Plot MVM-LBP histogram

    freq, lbp, _ = axs[idx, 2].hist(vecimgLBP, bins=256, color='black', alpha=0.7)

    axs[idx, 2].set_ylim(0, 40000)

    lbp = lbp[:-1]



    # Print MVM-LBP values with high frequencies

    largeTF = freq > 5000

    for x, fr in zip(lbp[largeTF], freq[largeTF]):

        axs[idx, 2].text(x, fr, "{:6.0f}".format(x), color="magenta")



    axs[idx, 2].set_title(f"MVM-LBP Histogram {idx + 1}")

    axs[idx, 2].set_xlabel("MVM-LBP Value")

    axs[idx, 2].set_ylabel("Frequency")



plt.tight_layout()

plt.show()


In [None]:
#gclm code for 5 pictures

In [None]:
import os

from torchvision import datasets, transforms

from torch.utils.data import DataLoader, Subset

import numpy as np



# Path to the dataset in Google Drive

data_dir = '/kaggle/input/infosys-mri-dataset'



# Define transformations for your dataset

data_transforms = {

    'train': transforms.Compose([

        transforms.Resize((224, 224)),

        transforms.ToTensor(),

    ]),

    'val': transforms.Compose([

        transforms.Resize((224, 224)),

        transforms.ToTensor(),

    ]),

    'test': transforms.Compose([

        transforms.Resize((224, 224)),

        transforms.ToTensor(),

    ]),

}



# Load labeled dataset (organized in class subdirectories)

labeled_data_dir = os.path.join(data_dir, 'train')

labeled_dataset = datasets.ImageFolder(labeled_data_dir, transform=data_transforms['train'])



# Extract indices for labeled data

labeled_indices = np.arange(len(labeled_dataset))

X_labeled = Subset(labeled_dataset, labeled_indices)  # Features (X) for labeled

y_labeled = [labeled_dataset.targets[i] for i in labeled_indices]  # Labels (y) for labeled



# Load unlabeled dataset if it's in a separate folder (no class subfolders)

unlabeled_data_dir = os.path.join(data_dir, 'unlabeled')

if os.path.exists(unlabeled_data_dir):

    unlabeled_dataset = datasets.ImageFolder(unlabeled_data_dir, transform=data_transforms['train'])

    # All images in this dataset are treated as unlabeled

    X_unlabeled = DataLoader(unlabeled_dataset, batch_size=32, shuffle=True)

else:

    print("Unlabeled dataset directory not found.")



# Create data loaders for labeled and unlabeled

train_loader = DataLoader(X_labeled, batch_size=32, shuffle=True)

val_loader = DataLoader(datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=data_transforms['val']),

                        batch_size=32, shuffle=True)

test_loader = DataLoader(datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=data_transforms['test']),

                         batch_size=32, shuffle=False)



# Store all loaders in a dictionary for easier access

dataloaders = {

    'train': train_loader,

    'val': val_loader,

    'test': test_loader,

    'unlabeled': X_unlabeled if 'X_unlabeled' in locals() else None  # Check if unlabeled data exists

}



print("Labeled and unlabeled data loaders created successfully.")


In [None]:
import torch

import torchvision.models as models
# Load the VGG-16 model pre-trained on ImageNet
vgg16 = models.vgg16(pretrained=True)
# Set the model to evaluation mode

vgg16.eval()

In [None]:
import torch



import torchvision.models as models



vgg16 = models.vgg16(pretrained=True)



vgg16.eval()



print(vgg16)

In [None]:
import torch
import torchvision.models as models

# For ResNet18
resnet18 = models.resnet18(pretrained=True)
resnet18.eval()  # Set the model to evaluation mode
print("ResNet18 Model:")
print(resnet18)



In [None]:
import torch

import torch.nn as nn

import torchvision.models as models

class Resnet18(nn.Module):
    def __init__(self, num_classes=2):
        super(Resnet18, self).__init__()
        model_resnet18 = models.resnet18(pretrained=True)
        self.conv1 = model_resnet18.conv1  # convolutional function
        self.bn1 = model_resnet18.bn1  # batch normalization
        self.relu = model_resnet18.relu  # relu is your activation function.
        self.maxpool = model_resnet18.maxpool  # maxpool is basically taking the biggest value per
        
        # sub_matrix
        self.layer1 = model_resnet18.layer1
        self.layer2 = model_resnet18.layer2
        self.layer3 = model_resnet18.layer3
        self.layer4 = model_resnet18.layer4  # these layers are used for deepening the layers in the architecture which will increase
        
        self.avgpool = model_resnet18.avgpool
        self.features = model_resnet18.fc.in_features
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(self.features, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
model = Resnet18()

print(model)

In [None]:
class Resnet50(nn.Module):
    def __init__(self, num_classes=2):
        super(Resnet50, self).__init__()
        model_resnet50 = models.resnet50(pretrained=True)
        self.conv1 = model_resnet50.conv1
        self.bn1 = model_resnet50.bn1
        self.relu = model_resnet50.relu
        self.maxpool = model_resnet50.maxpool
        self.layer1 = model_resnet50.layer1
        self.layer2 = model_resnet50.layer2
        self.layer3 = model_resnet50.layer3
        self.layer4 = model_resnet50.layer4
        self.avgpool = model_resnet50.avgpool
        self.features = model_resnet50.fc.in_features
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(self.features, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
import torch

import torch.nn as nn

import torchvision.models as models



class customVGG16(nn.Module):

    def __init__(self, num_classes=2):

        super(customVGG16, self).__init__()



        # Load the pre-trained VGG16 model

        vgg16 = models.vgg16(pretrained=True)



        # Extract features and avgpool layers

        self.features = vgg16.features

        self.avgpool = vgg16.avgpool



        # Define a new classifier

        self.classifier = nn.Sequential(

            nn.Linear(512 * 7 * 7, 4096),  # Linear layer with input size 512 7 7 and output size 4096

            nn.ReLU(),                    # ReLU activation function

            nn.Dropout(p=0.5),                 # Dropout

            nn.Linear(4096, 4096),         # Another linear layer with input size 4096 and output size 4096

            nn.ReLU(),                    # ReLU activation

            nn.Dropout(),                 # Dropout layer

            nn.Linear(4096, num_classes)  # Final Linear layer with output size equal to number of classes

        )



    # Forward Method (Make sure this is outside the __init__ method)

    def forward(self, x):

        # Pass input through the features layer

        x = self.features(x)

        # Pass through the AVGpool layer

        x = self.avgpool(x)

        # Reshape output to a 2D tensor

        x = torch.flatten(x, 1)

        # Pass through the classifier

        x = self.classifier(x)



        return x


In [None]:
model = customVGG16()

print(model)

In [None]:
import numpy as np

import torch



class EarlyStopping:

    def __init__(self, patience=12, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):



        self.patience = patience

        self.verbose = verbose

        self.delta = delta

        self.path = path

        self.trace_func = trace_func



        self.counter = 0

        self.best_score = None

        self.early_stop = False

        self.val_loss_min = np.Inf



    def __call__(self, val_loss, model):

        score = -val_loss



        # Initialize best_score if not set, and check for improvement

        if self.best_score is None:

            self.best_score = score

            self.save_checkpoint(val_loss, model)

        elif score < self.best_score + self.delta:

            self.counter += 1

            if self.verbose:

                self.trace_func(f"EarlyStopping counter: {self.counter} out of {self.patience}")

            if self.counter >= self.patience:

                self.early_stop = True

        else:

            self.best_score = score

            self.save_checkpoint(val_loss, model)

            self.counter = 0



    def save_checkpoint(self, val_loss, model):

        """Saves model when validation loss decreases."""

        if self.verbose:

            self.trace_func(f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...")

        torch.save(model.state_dict(), self.path)

        self.val_loss_min = val_loss


In [None]:
import torch



import torch.nn.functional as F



from tqdm import tqdm



import torch.optim as optim



from tqdm import tqdm

In [None]:
#no_cuda = not torch.cuda.is_available()  # Set to True if CUDA is not available

#device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")

#print(f"Using device: {device}")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)



#device = torch.device("cpu")

In [None]:
import torch
import torch.nn.functional as F
#from tqdm import tqdm #already imported in previous cell
import torch.optim as optim
#from tqdm import tqdm  #already imported in previous cell

# Get the CPU device

#device = torch.device("cpu")

#print(f"Using device: {device}")

# ... rest of your code using 'device' for tensors ...

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)

In [None]:
import torch
from tqdm import tqdm
import torch.nn.functional as F

epoch = 0

total_epochs = 50

loader = train_loader  # Ensure this is a DataLoader instance for training data

criterion = nn.CrossEntropyLoss()

l2_decay = 0.001

lr = 0.001  # Learning rate



def train(epoch, model, num_epochs, loader, criterion, l2_decay):

    learing_rate = max(lr*(0.1**(epoch//10)),1e-5)
    optimizer = torch.optim.SGD(model.parameters(), lr= learing_rate, momentum=0.9, weight_decay=l2_decay)

    model.train()

    correct = 0
    for data, label in tqdm(loader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch'):
        data = data.float().cuda()

        label = label.long().cuda()

        output = model(data)
        optimizer.zero_grad()
        loss = F.nll_loss(F.log_softmax(output, dim=1), label)
        loss.backward()
        optimizer.step()

        pred = output.data.max(1)[1]
        correct += pred.eq(label.data.view_as(pred)).cpu().sum()

    print(f'train accuracy: {100. * correct / len(loader.dataset)}%')



In [None]:
from sklearn.metrics import roc_curve, auc as compute_auc  # Rename the imported 'auc' function
import sklearn.metrics as metrics

def validation(model, val_loader):
    model.eval()  # Set model to evaluation mode
    test_loss = 0
    correct = 0
    all_predictions = []  # Store all predictions
    all_targets = []  # Store all targets
    possibilities = None  # Store probabilities for AUC

    for data, target in val_loader:
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()

        val_output = model(data)

        # Calculate test loss
        test_loss += F.nll_loss(F.log_softmax(val_output, dim=1), target, reduction='sum').item()

        # Get predictions and accumulate them
        pred = val_output.data.max(1)[1]
        all_predictions.extend(pred.cpu().numpy())  # Collect all predictions
        all_targets.extend(target.cpu().numpy())  # Collect all target labels

        # Calculate probabilities for AUC
        possibility = F.softmax(val_output, dim=1).cpu().detach().numpy()
        if possibilities is None:
            possibilities = possibility
        else:
            possibilities = np.concatenate((possibilities, possibility), axis=0)

        # Calculate the number of correct predictions
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    # Compute confusion matrix
    cm = metrics.confusion_matrix(all_targets, all_predictions)

    # One-hot encode the labels for AUC computation
    num_classes = val_output.shape[1]
    label_onehot = np.eye(num_classes)[np.array(all_targets).astype(int)]

    # Compute ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(label_onehot.ravel(), possibilities.ravel())
    auc_score = compute_auc(fpr, tpr)  # Use 'compute_auc' to avoid conflicts

    # Average test loss per sample
    test_loss /= len(val_loader.dataset)

    # Calculate specificity and sensitivity
    specificity = 1 - fpr[1] if len(fpr) > 1 else 0
    sensitivity = tpr[1] if len(tpr) > 1 else 0

    print('Specificity: {:.4f}, Sensitivity: {:.4f}, AUC: {:.4f}'.format(specificity, sensitivity, auc_score))
    print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(test_loss, 100. * correct / len(val_loader.dataset)))

    return test_loss, 100. * correct / len(val_loader.dataset), cm, auc_score


In [None]:
total_epochs = 50

lr = 0.0001

momentum = 0.9

no_cuda = False

num_classes=2

log_interval = 10

l2_decay = 0.001

model = customVGG16(num_classes=num_classes)

model = model.to(device)



criterion = nn.CrossEntropyLoss()

In [None]:
import os
from IPython.display import FileLink

# Model training
model.to(device)  

best_accuracy = 0
early_stop = EarlyStopping(patience=12, verbose=True)

project_name = 'tumor_classification'
model_name = 'vgg16'

# Set Kaggle working directory
os.chdir(r'/kaggle/working')

for epoch in range(1, total_epochs + 1):
    # Training step
    train(epoch, model, total_epochs, train_loader, criterion, l2_decay)

    # Validation step
    with torch.no_grad():
        test_loss, accuracy, cm, auc = validation(model, val_loader)

    # Handle model state for single/multiple GPUs
    model_state_dict = model.module.state_dict() if isinstance(model, nn.parallel.DistributedDataParallel) else model.state_dict()

    # Save directory for models
    model_save_dir = os.path.join('model', project_name, model_name)
    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)

    # Early stopping check
    early_stop(test_loss, model)

    # Save the best model
    if auc > best_accuracy:
        best_accuracy = auc
        model_save_path = os.path.join(model_save_dir, f'{model_name}_epoch_{epoch}.pth')
        torch.save(model_state_dict, model_save_path, _use_new_zipfile_serialization=False)
        print(f"Model saved at: {os.path.abspath(model_save_path)}")

        # Generate download link for Kaggle
        print("Generating download link for the saved model...")
        display(FileLink(model_save_path))

    # Stop training if early stopping is triggered
    if early_stop.early_stop:
        print("Early stopping")
        break


In [None]:
from sklearn import metrics

from sklearn.metrics import roc_auc_score, roc_curve
import torch

import numpy as np

import torch.nn.functional as F



def test(model, test_loader):
    
    name = 'test'

    len_test_loader = len(test_loader.dataset)

    model.eval()



    test_loss = 0

    correct = 0

    possibilities = None

    all_predictions = []

    labels = ['benign', 'malignant']



    for data, target in test_loader:

        if torch.cuda.is_available():

            data, target = data.cuda(), target.cuda()



        test_output = model(data)

        test_loss += F.nll_loss(F.log_softmax(test_output, dim=1), target, reduction='sum').item()



        pred = test_output.data.max(1)[1]

        all_predictions.append(pred.cpu().numpy())



        possibility = F.softmax(test_output, dim=1).cpu().data.numpy()

        if possibilities is None:

            possibilities = possibility

        else:

            possibilities = np.concatenate((possibilities, possibility), axis=0)



        correct += pred.eq(target.data.view_as(pred)).cpu().sum()



    all_predictions = [i for item in all_predictions for i in item]



    # classification metrics -> accuracy, f1 score

    print(metrics.classification_report(labels, all_predictions, labels=range(2), target_names=labels, digits=4))

    # confusion matrix

    cm = metrics.confusion_matrix(labels, all_predictions, labels=range(2))



    num_classes = test_output.shape[1]

    label_onehot = np.eye(num_classes)[np.array(labels).astype(int).tolist()]



    fpr, tpr, thresholds = roc_curve(label_onehot.ravel(), possibilities.ravel())

    auc_value = roc_auc_score(label_onehot, possibilities, average="macro")



    test_loss /= len_test_dataloader

    print('Specificity: {:.4f}, Sensitivity: {:.4f}, AUC: {:.4f}'.format(1 - fpr[0], tpr[0], auc_value))

    print('\n{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(

        name, test_loss, correct, len_test_dataloader,

        100. * correct / len_test_dataloader))



    return 100. * correct / len_test_dataloader, test_loss, auc_value


In [None]:
from sklearn import metrics
from sklearn.metrics import roc_auc_score, roc_curve
import torch
import numpy as np
import torch.nn.functional as F

# Testing setup
model.eval()
len_test_loader = len(test_loader.dataset)

test_loss = 0
correct = 0
possibilities = None
all_predictions = []
true_labels = []

# Iterate through the test data
for data, target in test_loader:
    if torch.cuda.is_available():
        data, target = data.cuda(), target.cuda()

    # Forward pass
    test_output = model(data)
    test_loss += F.nll_loss(F.log_softmax(test_output, dim=1), target, reduction='sum').item()

    # Get predictions and true labels
    pred = test_output.data.max(1)[1]
    all_predictions.extend(pred.cpu().numpy())
    true_labels.extend(target.cpu().numpy())

    # Store probabilities
    possibility = F.softmax(test_output, dim=1).cpu().data.numpy()
    if possibilities is None:
        possibilities = possibility
    else:
        possibilities = np.concatenate((possibilities, possibility), axis=0)

    # Calculate correct predictions
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

# Calculate test loss and accuracy
test_loss /= len_test_loader
accuracy = 100. * correct / len_test_loader

# Classification Report
class_names = ['benign', 'malignant']
print(metrics.classification_report(true_labels, all_predictions, target_names=class_names, digits=4))

# Confusion Matrix
cm = metrics.confusion_matrix(true_labels, all_predictions)
print("\nConfusion Matrix:\n", cm)

# ROC Curve and AUC
num_classes = len(class_names)
label_onehot = np.eye(num_classes)[np.array(true_labels).astype(int).tolist()]
fpr, tpr, thresholds = roc_curve(label_onehot.ravel(), possibilities.ravel())
auc_value = roc_auc_score(label_onehot, possibilities, average="macro")

# Print Specificity, Sensitivity, and AUC
print('Specificity: {:.4f}, Sensitivity: {:.4f}, AUC: {:.4f}'.format(1 - fpr[0], tpr[0], auc_value))

# Print Final Results
print('\nTest Set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len_test_loader, accuracy))

RESNET 18

In [None]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")

# Instantiate the model
model = Resnet18(num_classes=num_classes)
model = model.to(device)

In [None]:
import torch.nn as nn
total_epochs = 50
lr = 0.0001
momentum = 0.9
no_cuda = False
num_classes=2
log_interval = 10
l2_decay = 0.001
model = Resnet18(num_classes=num_classes)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [None]:
# model training
model.to(device)  # here device is cuda
best_accuracy = 0
early_stop = EarlyStopping(patience=12, verbose=True)
project_name = 'tumor_classfication'
model_name = 'resnet18'

# we will be using epochs. epochs will be defined in another code block.

for epoch in range(1, total_epochs + 1):

      #train(epoch, model)#train(epoch, total_epochs, train_loader, criterion, l2_decay, lr)

    train(epoch, model, total_epochs, train_loader, criterion, l2_decay)



    with torch.no_grad():



        #test_loss, auc = validation(model , val_loader)
        test_loss, accuracy, cm, auc = validation(model, val_loader)




    # making sure that the model can run on multiple GPUs



    dict = model.module.state_dict() if isinstance(model, nn.parallel.DistributedDataParallel) else model.state_dict()



    model_save_dir = os.path.join('model', project_name, model_name)



    if not os.path.exists(model_save_dir):

        os.makedirs(model_save_dir)



    early_stop(test_loss, model)



    if auc > best_accuracy:

        best_accuracy = auc

        #torch.save(os.path.join(model_save_dir, f'{model_name}_{epoch}.pth'), _use_new_zipfile_serialization=False)
        torch.save(dict, os.path.join(model_save_dir, f'{model_name}_{epoch}.pth'), _use_new_zipfile_serialization=False)




    if early_stop.early_stop:

        print("Early stopping")

        break


In [None]:
import torch
import torchvision.models as models

# For ResNet18
resnet50 = models.resnet50(pretrained=True)
resnet50.eval()  # Set the model to evaluation mode
print("ResNet50 Model:")
print(resnet50)

In [None]:
import torch

import torch.nn as nn

import torchvision.models as models

In [None]:
class Resnet50(nn.Module):
    def __init__(self, num_classes=2):
        super(Resnet50, self).__init__()
        model_resnet50 = models.resnet50(pretrained=True)
        self.conv1 = model_resnet50.conv1
        self.bn1 = model_resnet50.bn1
        self.relu = model_resnet50.relu
        self.maxpool = model_resnet50.maxpool
        self.layer1 = model_resnet50.layer1
        self.layer2 = model_resnet50.layer2
        self.layer3 = model_resnet50.layer3
        self.layer4 = model_resnet50.layer4
        self.avgpool = model_resnet50.avgpool
        self.features = model_resnet50.fc.in_features
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(self.features, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
model = Resnet50()

print(model)

In [None]:
import torch.nn as nn
total_epochs = 50
lr = 0.0001
momentum = 0.9
no_cuda = False
num_classes=2
log_interval = 10
l2_decay = 0.001
#model = customVGG16(num_classes=num_classes)
model = Resnet50(num_classes=num_classes)
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [None]:
# model training
model.to(device)  # here device is cuda
best_accuracy = 0
early_stop = EarlyStopping(patience=12, verbose=True)
project_name = 'tumor_classfication'
model_name = 'resnet50'
# we will be using epochs. epochs will be defined in another code block.
for epoch in range(1, total_epochs + 1):

      #train(epoch, model)#train(epoch, total_epochs, train_loader, criterion, l2_decay, lr)

    train(epoch, model, total_epochs, train_loader, criterion, l2_decay)



    with torch.no_grad():



        #test_loss, auc = validation(model , val_loader)
        test_loss, accuracy, cm, auc = validation(model, val_loader)




    # making sure that the model can run on multiple GPUs



    dict = model.module.state_dict() if isinstance(model, nn.parallel.DistributedDataParallel) else model.state_dict()



    model_save_dir = os.path.join('model', project_name, model_name)



    if not os.path.exists(model_save_dir):

        os.makedirs(model_save_dir)



    early_stop(test_loss, model)



    if auc > best_accuracy:

        best_accuracy = auc

        #torch.save(os.path.join(model_save_dir, f'{model_name}_{epoch}.pth'), _use_new_zipfile_serialization=False)
        torch.save(dict, os.path.join(model_save_dir, f'{model_name}_{epoch}.pth'), _use_new_zipfile_serialization=False)
        print(f'{model_name}_{epoch}')




    if early_stop.early_stop:

        print("Early stopping")

        break


In [None]:
from sklearn import metrics

from sklearn.metrics import roc_auc_score, roc_curve

import torch

import numpy as np

import torch.nn.functional as F



def test(model, test_loader):

    name = 'test'

    len_test_loader = len(test_loader.dataset)

    model.eval()



    test_loss = 0

    correct = 0

    possibilities = None

    all_predictions = []

    labels = ['benign', 'malignant']



    for data, target in test_loader:

        if torch.cuda.is_available():

            data, target = data.cuda(), target.cuda()



        test_output = model(data)

        test_loss += F.nll_loss(F.log_softmax(test_output, dim=1), target, reduction='sum').item()



        pred = test_output.data.max(1)[1]

        all_predictions.append(pred.cpu().numpy())



        possibility = F.softmax(test_output, dim=1).cpu().data.numpy()

        if possibilities is None:

            possibilities = possibility

        else:

            possibilities = np.concatenate((possibilities, possibility), axis=0)



        correct += pred.eq(target.data.view_as(pred)).cpu().sum()



    all_predictions = [i for item in all_predictions for i in item]



    # classification metrics -> accuracy, f1 score

    print(metrics.classification_report(labels, all_predictions, labels=range(2), target_names=labels, digits=4))

    # confusion matrix

    cm = metrics.confusion_matrix(labels, all_predictions, labels=range(2))



    num_classes = test_output.shape[1]

    label_onehot = np.eye(num_classes)[np.array(labels).astype(int).tolist()]



    fpr, tpr, thresholds = roc_curve(label_onehot.ravel(), possibilities.ravel())

    auc_value = roc_auc_score(label_onehot, possibilities, average="macro")



    test_loss /= len_test_loader

    print('Specificity: {:.4f}, Sensitivity: {:.4f}, AUC: {:.4f}'.format(1 - fpr[0], tpr[0], auc_value))

    print('\n{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(

        name, test_loss, correct, len_test_loader,

        100. * correct / len_test_loader))



    return 100. * correct / len_test_loader, test_loss, auc_value


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import torch
import numpy as np
import torch.nn.functional as F

def test(model, test_loader):
    """
    Evaluate the model on a test dataset and calculate classification metrics.

    Args:
    - model (torch.nn.Module): The trained model.
    - test_loader (torch.utils.data.DataLoader): DataLoader for the test dataset.

    Returns:
    - accuracy (float): Test accuracy in percentage.
    - test_loss (float): Average loss on the test set.
    - auc_value (float): Area under the ROC curve.
    """
    model.eval()
    len_test_loader = len(test_loader.dataset)

    test_loss = 0
    correct = 0
    all_targets = []
    all_predictions = []
    possibilities = None

    for data, target in test_loader:
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()

        # Forward pass
        test_output = model(data)
        
        # Compute loss
        test_loss += F.nll_loss(F.log_softmax(test_output, dim=1), target, reduction='sum').item()

        # Predictions
        pred = test_output.data.max(1)[1]
        all_predictions.extend(pred.cpu().numpy())
        all_targets.extend(target.cpu().numpy())

        # Probabilities
        possibility = F.softmax(test_output, dim=1).cpu().data.numpy()
        if possibilities is None:
            possibilities = possibility
        else:
            possibilities = np.concatenate((possibilities, possibility), axis=0)

        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    # Flatten lists for metric computation
    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)

    # Classification metrics
    print("\nClassification Report:")
    print(classification_report(all_targets, all_predictions, target_names=['benign', 'malignant'], digits=4))

    # Confusion matrix
    print("\nConfusion Matrix:")
    cm = confusion_matrix(all_targets, all_predictions)
    print(cm)

    # ROC and AUC
    fpr, tpr, thresholds = roc_curve(all_targets, possibilities[:, 1])  # Use the positive class probabilities
    auc_value = roc_auc_score(all_targets, possibilities[:, 1])

    # Specificity and Sensitivity
    specificity = 1 - fpr[1]  # Specificity = 1 - FPR
    sensitivity = tpr[1]      # Sensitivity = TPR

    # Average loss and accuracy
    test_loss /= len_test_loader
    accuracy = 100. * correct / len_test_loader

    print('\nSpecificity: {:.4f}, Sensitivity: {:.4f}, AUC: {:.4f}'.format(specificity, sensitivity, auc_value))
    print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        "Test", test_loss, correct, len_test_loader, accuracy))

    return accuracy, test_loss, auc_value


In [None]:
accuracy, test_loss, auc_value = test(model, test_loader)
