In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import csv
import cv2 as cv
import numpy as np
from pathlib import Path
import sys
print(Path.cwd())
sys.path.insert(0, str(Path.cwd() / ".."))
from image_normalization.image_norm import setGrayToBlack, paddImage, rotate, rotate_same_dim
from pymage_size import get_image_size
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

c:\Users\sondr\OneDrive\Dokumenter\a\TTT23\TTT23_Project\MachineLearning


Read images:    

In [3]:

def readImages(csvPath):    
    """
    Reads the images, sets the grayscale values to black, i.e. removes all the area without a 
    heatmap. Converts remaining image to grayscale and pads, to make every image equal in size.
    
    TODO: instead of converting to grayscale with BGR2GRAY convert to grayscale where red is
    white and purple is black

    returns the images and an array y of the roi values. 
    """
    filenames = []
    roi = []
    max_shape = [0, 0]
    with open(csvPath) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                print(f'Column names are {", ".join(row)}')
                line_count += 1
            else:
                path = Path.cwd() / ".." / row[0]
                filenames.append(str(path))
                img_size = get_image_size(str(path)).get_dimensions()[::-1]
                if img_size[0] > max_shape[0]:
                    max_shape[0] = img_size[0]
                if img_size[1] > max_shape[1]:
                    max_shape[1] = img_size[1]

                roi.append([float(row[1]), float(row[2]), float(row[3])])

    y = np.zeros((len(filenames), 3), float)
    images = np.zeros((len(filenames), max_shape[0], max_shape[1]), dtype=np.uint8) 
    for i, filepath in enumerate(filenames):
        img = cv.imread(str(filepath))
        cv.imshow("img1", img)
        img_black = setGrayToBlack(img, threshold=150)
        cv.imshow("img_gray_black", img_black)

        img_gray = cv.cvtColor(img_black, cv.COLOR_BGR2GRAY)
        cv.imshow("img_gray", img_gray)
        img_pad = paddImage(img_gray, max_shape)
        cv.imshow("pad", img_pad)
        images[i] = img_pad
        y[i] = np.array(roi[i])
        cv.waitKey(0)
        cv.destroyAllWindows()
        return
    return images, y

images, y = readImages(R"C:\Users\sondr\OneDrive\Dokumenter\a\TTT23\TTT23_Project\data.csv")
print(images.shape)
print(y.shape)

Column names are path, body, head, lung


TypeError: cannot unpack non-iterable NoneType object

Split date into training and test set

In [34]:
X_train, X_test, y_train, y_test = train_test_split(images, y, test_size=0.2, random_state=41)

Augment training set. This could be done on the initial set aswell, but then the test set would be of a "lower" quality

In [29]:
def augment_train(X, Y):

    numAugment = np.random.randint(3, 11, size=(len(X)))
    X_out = np.zeros((len(X) + np.sum(numAugment), X.shape[1], X.shape[2]), dtype=X.dtype)
    Y_out = np.zeros((len(Y) + np.sum(numAugment), Y.shape[1]), dtype=Y.dtype)

    out_index = 0

    for i in range(len(X)):
        #augment a random number of times inbetween 3 and 10 a random degree number between -30 and 30 degrees
        X_out[out_index] = X[i]
        Y_out[out_index] = Y[i]
        out_index += 1
        for k in range(numAugment[i]):
            rand_rot = np.random.randint(-30, 30)
            aug_image = rotate_same_dim(X[i], rand_rot)
            X_out[out_index] = aug_image
            Y_out[out_index] = Y[i]
            out_index += 1
    
    return X_out, Y_out

X_train_augmented, y_train = augment_train(X_train, y_train)

IndexError: tuple index out of range

Flatten the images

In [31]:
X_train = X_train_augmented.reshape((X_train_augmented.shape[0], X_train_augmented.shape[1]*X_train_augmented.shape[2])) # Flatten images
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1]*X_test.shape[2]))
print(X_train.shape)
print(X_test.shape)

(493, 81753)
(17, 81753)


In [35]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1]*X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1]*X_test.shape[2]))
print(X_train.shape)
print(X_test.shape)

(64, 81753)
(17, 81753)


Generate regressor and train data

Variables in RandomForestRegressor can be changed, n_estimators is the number of trees used. Max_features is the number of features that one look for when splitting a node. None means max_features=n_estimators and is slower. 

Can also use decisitonTreeRegressor to omit the randomness of the randomForestDecisionTree.


In [23]:
#Make a tree without random forest
regressor = DecisionTreeRegressor()
multi_output_regressor = MultiOutputRegressor(regressor)

In [36]:
regressor = RandomForestRegressor(n_estimators=100, max_features='sqrt')
# Create a MultiOutputRegressor
multi_output_regressor = MultiOutputRegressor(regressor)

In [37]:
# Fit data
multi_output_regressor.fit(X_train, y_train)
#predict data
predictions = multi_output_regressor.predict(X_test)


Visualize

In [39]:
diff = np.zeros(3, dtype=float)
tot_diff = 0
i = 0
diffs = np.zeros(predictions.shape, float)
print("Imag results: ROI1,\t ROI2,\t ROI3")
for test, pred in zip(y_test, predictions):
    d_test = test
    d_pred = pred
    # d_test = deNormalizeYresults(test, maks_y, min_y)
    # d_pred = deNormalizeYresults(pred, maks_y, min_y)


    print(f"img{i} actual: {d_test}, avg: {np.sum(test)/3}")
    print(f"img{i} predic: {d_pred}, avg: {np.sum(pred)/3}, diff: {abs(d_test - d_pred)}, tot diff: {np.sum(test-pred)}")
    print()
    l_diff = np.abs(d_test-d_pred)
    diffs[i] = l_diff
    diff += l_diff
    tot_diff += np.sum(l_diff)/3
    i+=1

print(f"Total difference: {tot_diff/len(y_test)}")
print(f"Average local diff: {diff/len(y_test)}")
print(f"Variance  of diffs: {np.sqrt(np.var(diffs, axis=0))}")

Imag results: ROI1,	 ROI2,	 ROI3
img0 actual: [1991000.  498600.  157600.], avg: 882400.0
img0 predic: [2152905.   914040.   288089.2], avg: 1118344.7333333334, diff: [161905.  415440.  130489.2], tot diff: -707834.2

img1 actual: [3710000. 2826000.  820300.], avg: 2452100.0
img1 predic: [4762920. 2283992.  803325.], avg: 2616745.6666666665, diff: [1052920.  542008.   16975.], tot diff: -493937.0

img2 actual: [2385000.  977600.  259500.], avg: 1207366.6666666667
img2 predic: [3036835. 1725317.  600180.], avg: 1787444.0, diff: [651835. 747717. 340680.], tot diff: -1740232.0

img3 actual: [4057000.  623700.  231500.], avg: 1637400.0
img3 predic: [3520590.   646265.   285846.6], avg: 1484233.8666666665, diff: [536410.   22565.   54346.6], tot diff: 459498.4

img4 actual: [3205000. 2362000.  909000.], avg: 2158666.6666666665
img4 predic: [3294460.  1956331.   632583.1], avg: 1961124.7, diff: [ 89460.  405669.  276416.9], tot diff: 592625.9

img5 actual: [8474000. 1866000.  849100.], avg: 