### load the clean data

### straight clahe

### build feature vectors

### 1. SIFT keypoints

#### 2. Moravec Corner Detection

#### 3. Thresholding operation (try various thresholds)

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os


### Start with SIFT data and split into train and test set

In [2]:
def determine_disease(filename):
    '''
    Check to see if image is TB or not.
    Input: name of image (last str position is label)
    Output: Boolean value of disease or not
    '''
    if filename[-4] == '1':
        return True
    else:
        return False
    
# TODO: finetune these hyperparameters
def increase_contrast(image, box_size=8, lim = 2):
    '''
    Increase contrast of image
    Input: Image
    Optional: box size and clip limit- can finetune these
    Output: contrast increased image  
    '''
    clahe = cv2.createCLAHE(clipLimit=lim, tileGridSize=(box_size, box_size))
    return clahe.apply(image)

def sift_kp_des(image):
    ''' 
    find sift keypoints and detection on an image
    Input: Image
    Output: sift kp and descriptors
    '''
    sift = cv2.SIFT_create()
    return sift.detectAndCompute(image,None)

# TODO: finetune these hyperparameters
def harris_detection(image, blocksize = 2, ksize = 3, k = .04):
    ''' 
    get HCD corners from image. Idea is to isolate the hilar trunk
    input: image
    Optional: hyperpatameters for HCD which can be finetuned
    return: np array of detected corners
    '''
    return cv2.cornerHarris(image,blocksize,ksize,k)

def threshold_image(image, min_val = 127, max_val=255):
    '''
    input: image
    Optional: hyperpatameter pixel values for thresholding 
    return: count of white pixels normalized by image size
    '''
    _, thresh = cv2.threshold(image, min_val, max_val, cv2.THRESH_BINARY)
    white_pixels = np.sum(thresh == max_val)
    total_pixels = image.size
    return white_pixels / total_pixels

### build data dictionary

In [3]:
path = '/Users/georgienahass/Desktop/fall_classes_PhD/CS415/cs415_final/clean_images'

data_dict = {'0': {'sift': [], 'hcd': [], 'wp' : {'127': [], '150': [], '170': [], '200': []}}, 
             '1': {'sift': [], 'hcd': [], 'wp' : {'127': [], '150': [], '170': [], '200': []}}}
i=0
for root, dirs, files in os.walk(path):
    for file in files:
        if i < 20:
            print(file)
            if file.endswith('.png'):
                image = cv2.imread(os.path.join(root, file), cv2.IMREAD_GRAYSCALE)
            
                # apply contrast to images
                cont_image = increase_contrast(image)
                
                # detect Sift keypoints
                kp, des = sift_kp_des(cont_image)
                
                #HCD
                harris = harris_detection(cont_image, blocksize=20, ksize=11)
                corners = np.argwhere((harris > np.percentile(harris, 99.7)) & (harris < np.percentile(harris, 99.9)))

                # Thresholding
                thresholds = [127, 150, 170, 200]
                for threshold in thresholds:
                    per_white = threshold_image(image, min_val=threshold)
                    if determine_disease(file):
                        data_dict['1']['wp'][str(threshold)].append(per_white)
                    else:
                        data_dict['0']['wp'][str(threshold)].append(per_white)
                    
                if determine_disease(file):
                    data_dict['1']['sift'].append(des)
                    data_dict['1']['hcd'].append(corners)

                else:
                    data_dict['0']['sift'].append(des)
                    data_dict['0']['hcd'].append(corners)
        
                i+=1

CHNCXR_0492_1.png
CHNCXR_0187_0.png
CHNCXR_0342_1.png
CHNCXR_0286_0.png
CHNCXR_0022_0.png
CHNCXR_0067_0.png
CHNCXR_0572_1.png
CHNCXR_0537_1.png
CHNCXR_0323_0.png
CHNCXR_0636_1.png
CHNCXR_0147_0.png
CHNCXR_0102_0.png
CHNCXR_0417_1.png
CHNCXR_0452_1.png
CHNCXR_0246_0.png


In [None]:
                # HCD tuning
                # blocks = [10, 15,20,30, 50]
                # ks = [3,5,11,15,21]
                # for bs in blocks:
                #     for k_size in ks:            
                #         print(bs, k_size)
                #         harris = harris_detection(cont_image, blocksize=bs, ksize=k_size )
                #         print(harris.nonzero())
                #         # plot hcd on color image
                #         rgb_img = cv2.cvtColor(cont_image,cv2.COLOR_GRAY2BGR)
                #         # rgb_img[harris > 0.01 * harris.max()] = [0,0,255]
                #         lower_percentile = np.percentile(harris, 99.7)   # Change 1 to your desired lower percentile
                #         upper_percentile = np.percentile(harris, 99.9)  # Change 20 to your desired upper percentile

                #         # Find coordinates of strong corners
                #         corners = np.argwhere((harris > np.percentile(harris, 99.7)) & (harris < np.percentile(harris, 99.9)))
                    
                #         print(len(corners))
                #         for y, x in corners:
                #             cv2.circle(rgb_img, (x, y), 20, (255, 0, 255), -1)

                #         plt.imshow(rgb_img)
                #         plt.show()
                        
                # print(harris)