In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import accuracy_score
import csv
import cv2 as cv
import numpy as np
from pathlib import Path
import sys
print(Path.cwd())
sys.path.insert(0, str(Path.cwd() / "image_normalization"))
from image_norm import setGrayToBlack, paddImage
from pymage_size import get_image_size

/Users/julieschult/Desktop/BiomedicalSignalProcessing/TTT23_Project


In [5]:
def readImages(csvPath):

    filenames = []
    roi = []
    max_shape = [0, 0]
    with open(csvPath) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                print(f'Column names are {", ".join(row)}')
                line_count += 1
            else:
                path = Path.cwd() / row[0]
                filenames.append(str(path))
                img_size = get_image_size(str(path)).get_dimensions()[::-1]
                if img_size[0] > max_shape[0]:
                    max_shape[0] = img_size[0]
                if img_size[1] > max_shape[1]:
                    max_shape[1] = img_size[1]

                roi.append([float(row[1]), float(row[2]), float(row[3])])

    y = np.zeros((len(filenames), 3), float)
    images = np.zeros((len(filenames), max_shape[0], max_shape[1]), dtype=np.uint8) 
    for i, filepath in enumerate(filenames):
        img = cv.imread(str(filepath))
        # cv.imshow("img1", img)
        img_black = setGrayToBlack(img, threshold=150)
        # cv.imshow("img_gray_black", img)
        img_gray = cv.cvtColor(img_black, cv.COLOR_BGR2GRAY)
        # cv.imshow("img_gray", img)
        img_pad = paddImage(img_gray, max_shape)
        # cv.imshow("pad", img)
        images[i] = img_pad
        y[i] = np.array(roi[i])
    
    return images, y



def normalizeY(y):
    return (y-np.amin(y))/(np.amax(y) - np.amin(y))

In [6]:
images, y = readImages(r"data.csv")
y = normalizeY(y)
X = images.reshape((images.shape[0], images.shape[1]*images.shape[2])) # Flatten images

Column names are path, body, head, lung


In [7]:
def mse(y_pred, y_test):
    MSEs = [0, 0, 0]
    n = len(y_pred)
    for idx, i in enumerate(y_pred):
        for idx2, j in enumerate(i):
            MSEs[idx2] += (j - y_test[idx][idx2])**2
    for idx3, _ in enumerate(MSEs):
        MSEs[idx3] = MSEs[idx3] / n
    return MSEs

def mae(y_pred, y_test):
    MAEs = [0, 0, 0]
    n = len(y_pred)
    for idx, i in enumerate(y_pred):
        for idx2, j in enumerate(i):
            MAEs[idx2] += abs(j - y_test[idx][idx2])
    for idx3, _ in enumerate(MAEs):
        MAEs[idx3] = MAEs[idx3] / n
    return MAEs

In [8]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42)
print(len(X_train), len(X_val), len(X_test))

64 11 6


In [6]:
#svm = SVR(kernel='linear', C=1.0)
#reg = MultiOutputRegressor(svm)
#reg.fit(X_train, y_train)

In [12]:
best_performance = 0
patience = 5  # Number of epochs with no improvement after which training will be stopped
no_improvement_count = 0
num_epochs = 100

svm = SVR(kernel='linear', C=5.0)
reg = MultiOutputRegressor(svm)

for epoch in range(num_epochs):
    # Train the SVM on the training data
    reg.fit(X_train, y_train)
    # Evaluate performance on the validation set
    y_pred = reg.predict(X_val)
    MSEs = mse(y_pred, y_val)
    current_performance = np.mean(MSEs)

    if current_performance > best_performance:
        print(f'New best MSE of {current_performance}')
        best_performance = current_performance
        no_improvement_count = 0
    else:
        no_improvement_count += 1

        if no_improvement_count >= patience:
            print(f'Early stopping at epoch {epoch}')
            break


New best MSE of 0.01140047610824014
Early stopping at epoch 5


In [10]:
y_pred = reg.predict(X_test)


for idx, i in enumerate(y_pred):
    print(f'IMAGE {idx+1}:')
    for idx2, j in enumerate(i):
        print(f'\t ROI{idx2+1}: real {y_test[idx][idx2]}, pred {j}.')
        
MSEs = mse(y_pred, y_test)
print(f'MSEs: {MSEs}')
print(f'Mean MSE: {np.mean(MSEs)}')

IMAGE 1:
	 ROI1: real 0.10810121688922154, pred 0.2374752254048472.
	 ROI2: real 0.07921089319422112, pred 0.062318792898532246.
	 ROI3: real 0.02941535306511598, pred 0.11724596818097083.
IMAGE 2:
	 ROI1: real 1.0, pred 0.7367290278181411.
	 ROI2: real 0.052650987068924154, pred 0.062318792898532246.
	 ROI3: real 0.23665116253565882, pred 0.13167996967030332.
IMAGE 3:
	 ROI1: real 0.08794995908060914, pred 0.4358944689193346.
	 ROI2: real 0.06125296956885904, pred 0.062318792898532246.
	 ROI3: real 0.022982715155802122, pred 0.12041957023112786.
IMAGE 4:
	 ROI1: real 0.018517223842090907, pred 0.06909224519558617.
	 ROI2: real 0.008482719953720649, pred 0.062318792898532246.
	 ROI3: real 0.0010085910158290187, pred 0.09141930584414318.
IMAGE 5:
	 ROI1: real 0.06173276142144505, pred 0.24206014954607394.
	 ROI2: real 0.023390538230500232, pred 0.062318792898532246.
	 ROI3: real 0.007177000697754437, pred 0.10623215169692773.
IMAGE 6:
	 ROI1: real 0.08404308256669449, pred -0.2020942719