# Applying MedMnist data to a convolutional neural network with Pytorch

In [152]:
import torch
from torch import nn
from torch.utils.data import Dataset ,DataLoader, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor , Lambda
import pandas as pd
import os
from torchvision.io import read_image
import medmnist
import numpy as np
from medmnist import INFO, Evaluator
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.optim as optim
from medmnist import INFO, Evaluator

In [153]:
Evaluator

medmnist.evaluator.Evaluator

### In this notebook we going to learn how to preprocess a dataset and feed it to a cnn with pytorch 
#### it a Binary Classification with Two classes

In [154]:
print(f'medmnist version : {medmnist.__version__}')

medmnist version : 2.2.3


In [155]:
data_flag = 'breastmnist'
download = True
info=INFO[data_flag]
info

{'python_class': 'BreastMNIST',
 'description': 'The BreastMNIST is based on a dataset of 780 breast ultrasound images. It is categorized into 3 classes: normal, benign, and malignant. As we use low-resolution images, we simplify the task into binary classification by combining normal and benign as positive and classifying them against malignant as negative. We split the source dataset with a ratio of 7:1:2 into training, validation and test set. The source images of 1×500×500 are resized into 1×28×28.',
 'url': 'https://zenodo.org/record/6496656/files/breastmnist.npz?download=1',
 'MD5': '750601b1f35ba3300ea97c75c52ff8f6',
 'task': 'binary-class',
 'label': {'0': 'malignant', '1': 'normal, benign'},
 'n_channels': 1,
 'n_samples': {'train': 546, 'val': 78, 'test': 156},
 'license': 'CC BY 4.0'}

In [156]:
info['n_samples']

{'train': 546, 'val': 78, 'test': 156}

In [157]:
import zipfile
def extractdata(data):
 # Load the .npz archive
 loaded_data = np.load(data)

 # Access non-array files (if any)
 # Note: Files other than NumPy arrays are not directly accessible via numpy.load

 # Specify the destination folder for extracted files
 destination_folder = os.getcwd()

 with zipfile.ZipFile(data) as archive:
    # List the files in the archive
    file_list = archive.namelist()

    # Extract each file from the archive to the destination folder
    for file_name in file_list:
        archive.extract(file_name, destination_folder)
        extracted_file_path = os.path.join(destination_folder, file_name)
        print(f"Extracted: {extracted_file_path}")

In [158]:
data=os.path.join('/home/sanaa/.medmnist/','breastmnist.npz')
extractdata(data)

Extracted: /home/sanaa/PHD/fedsim/train_images.npy
Extracted: /home/sanaa/PHD/fedsim/val_images.npy
Extracted: /home/sanaa/PHD/fedsim/test_images.npy
Extracted: /home/sanaa/PHD/fedsim/train_labels.npy
Extracted: /home/sanaa/PHD/fedsim/val_labels.npy
Extracted: /home/sanaa/PHD/fedsim/test_labels.npy


#### Train dataset

In [159]:
ytrainpath=os.path.join(os.getcwd(),'train_labels.npy')
xtrainpath=os.path.join(os.getcwd(),'train_images.npy')

In [160]:
xtrainpath

'/home/sanaa/PHD/fedsim/train_images.npy'

#### Test dataset

In [161]:
xtestpath=os.path.join(os.getcwd(),'test_images.npy')
ytestpath=os.path.join(os.getcwd(),'test_labels.npy')

In [162]:
#convert labels to hot encoded
#encoder = OneHotEncoder(sparse=False)
#target = encoder.fit_transform(np.load(ytrainpath))

In [163]:
def convertTorch(xdata,ydata):
    #conveert numpy data to tensor
    xdata=np.load(xdata)
    
    xdata_=torch.from_numpy(xdata).float()
    #xdata_/= 255.0
    ydata_=torch.from_numpy(np.load(ydata))

    return xdata_,ydata_

In [164]:
#train data features
#conveert numpy data to tensor
xtrain,ytrain=convertTorch(xtrainpath,ytrainpath)
xtest,ytest=convertTorch(xtestpath,ytestpath)

In [165]:
#print(f'inputs images of training dataset : {xtrain[0]}')
print(f' input train shape {xtrain.shape} , and input label shape : {xtest.shape}')
print(f'output train shape: {ytest.shape} and out label shape : {ytest.shape}')

 input train shape torch.Size([546, 28, 28]) , and input label shape : torch.Size([156, 28, 28])
output train shape: torch.Size([156, 1]) and out label shape : torch.Size([156, 1])


In [166]:
#training dataset
train_dataset=TensorDataset(xtrain,ytrain)
#test dataset
test_dataset=TensorDataset(xtest,ytest)

In [167]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)


In [168]:
examples = next(iter(train_dataloader))
#examples

In [169]:
for batch, (X, y) in enumerate(train_dataloader):
    print(X.shape)

torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([64, 28, 28])
torch.Size([34, 28, 28])


In [170]:
#crrate a NN basic Model 
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [171]:
#optimizing model parameters
## we need a loss function and an optmizer

In [172]:
# define a simple CNN model
class Net(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.BatchNorm2d(16),
            nn.ReLU())

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 16, kernel_size=3),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = nn.Sequential(
            nn.Conv2d(16, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU())
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU())

        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes))

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
n_channels=1
n_classes=1
modelh = Net(in_channels=n_channels, num_classes=n_classes)
modelh.to(device)    
# define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
lr = 0.001    
optimizer = optim.SGD(modelh.parameters(), lr=lr, momentum=0.9)

In [173]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        X = X.unsqueeze(1)  # Adds a channel dimension at position 1 
        # Compute prediction error
        pred = model(X)
        #y = y.view(-1, 1)
        loss = loss_fn(pred, y.float())

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        

In [174]:

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            X = X.unsqueeze(1)  # Adds a channel dimension at position 1 
            y=y.float()
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_loss /= num_batches
            #print(pred)
        print("Accuracy: ", ((pred > 0.0) == y).float().mean().item())   


      

In [175]:
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, modelh, criterion, optimizer)
    test(test_dataloader, modelh, criterion)
print("Done!")



Epoch 1
-------------------------------
loss: 0.702764  [   64/  546]
Accuracy:  0.2857142984867096
Epoch 2
-------------------------------
loss: 0.668159  [   64/  546]
Accuracy:  0.785714328289032
Epoch 3
-------------------------------
loss: 0.600523  [   64/  546]
Accuracy:  0.7500000596046448
Epoch 4
-------------------------------
loss: 0.624808  [   64/  546]
Accuracy:  0.785714328289032
Epoch 5
-------------------------------
loss: 0.627794  [   64/  546]
Accuracy:  0.7142857313156128
Epoch 6
-------------------------------
loss: 0.450490  [   64/  546]
Accuracy:  0.6071428656578064
Epoch 7
-------------------------------
loss: 0.465524  [   64/  546]
Accuracy:  0.785714328289032
Epoch 8
-------------------------------
loss: 0.470156  [   64/  546]
Accuracy:  0.7142857313156128
Epoch 9
-------------------------------
loss: 0.407338  [   64/  546]
Accuracy:  0.6785714626312256
Epoch 10
-------------------------------
loss: 0.533230  [   64/  546]
Accuracy:  0.8571429252624512
Ep