In [None]:
import os
import pydicom
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import torch
from torchvision.transforms import Resize

In [None]:
# Check if CUDA is available and set the device accordingly
# used to run on gpu, not sure if it works with onDemand
# but runs locally, probably best to leave commented out

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Resizing to 128 x 128 for all mammograms
resize_transform = Resize((128, 128))

images = []

def resizeAndLabelDCM(rootFolder) -> None:
    # Iterate through each folder in the root folder
    for folderName in os.listdir(rootFolder):
        folderPath = os.path.join(rootFolder, folderName)

        # Check if the current item is a folder
        if os.path.isdir(folderPath):
            print(f"Processing folder: {folderName}")
            # Call the function recursively to process subfolders
            resizeAndLabelDCM(folderPath)

            for filename in os.listdir(folderPath):
                if filename.endswith(".dcm"):
                    ds = pydicom.dcmread(os.path.join(folderPath, filename))
                    image = ds.pixel_array.astype(float)
                    
                    # Convert the image to a PyTorch tensor
                    # image_tensor = torch.tensor(image).unsqueeze(0).to(device)
                    image_tensor = torch.tensor(image).unsqueeze(0)
                    
                    # Resize the image
                    image_tensor = resize_transform(image_tensor)
                    
                    # Flatten to create feature vector
                    images.append(image_tensor.view(-1).cpu().numpy())

                    # To view the image, uncomment next line
                    # plt.imshow(image, cmap='gray'); plt.title(f"Patient ID: {folderName}"); plt.show()

In [None]:
# Specify the root folder, that may contain subfolders, with DICOM files
rootFolderPath = "path/to/train_images"

resizeAndLabelDCM(rootFolderPath)

In [None]:
xTrain = np.array(images)

# y is the label -> cancer
data = pd.read_csv("path/to/train_images")
yTrain = np.array(data.loc[:, "cancer"])

print(f"X-shape: {xTrain.shape}, y-shape: {yTrain.shape}")

In [None]:
# Train SVM model
svm = SVC(kernel='linear')
svm.fit(xTrain, yTrain)

In [None]:
xTest = pd.read_csv("path/to/train_images")
xTest = np.array(xTest)

In [None]:
# Predict labels for test set
yPred = svm.predict(xTest)

In [None]:
# Evaluate model
accuracy = accuracy_score(yTest, yPred)
print(f"Accuracy: {accuracy}")