In [None]:
import os
import cv2

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import mahotas as mt

from PIL import Image 
from skimage import io 
from sklearn.model_selection import train_test_split
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border 
from skimage.morphology import remove_small_objects 
from skimage.morphology import binary_closing


In [None]:
def normalizeImages(img): 
    # Resize image to 750 x 375
    img = cv2.resize(orig_img, dsize = (375,750), interpolation=cv2.INTER_NEAREST)
    img = resize_img.astype('float32')

    # Normalize images using max-min
    normalize_img = np.divide(np.subtract(resize_img, np.min(resize_img)),
                              np.subtract(np.max(resize_img ), np.min(resize_img)))

    # Standardize image using z-score and adding constant to make positive arrays 
    scaled_img = np.divide(np.subtract(normalize_img, np.mean(normalize_img)),
                           np.std(normalize_img))
    scaled_img += abs(np.amin(scaled_img))
    return scaled_img 
    
def segmentAndSmooth(img): 
    #Otsu's Threshold 
    oned_img = img.ravel()
    nonzero_oned = oned_img[oned_img > 0]
    thresh = threshold_otsu(nonzero_oned)
    image = scaled_img > thresh 
    
    #Filtering and Smoothing 
    labeled_img = skimage.measure.label(image, neighbors=8)
    filtered_img = remove_small_objects(labeled_img, min_size=0.1, connectivity=8)
    smoothed_img = binary_closing(filtered_img)
    return smoothed_img 

def featureExtraction(normalizedImg, smoothedImg): 
    features = []
    textures = mt.features.haralick(smoothedImg)
    hu_moments = cv2.HuMoments(cv2.moments(normalizedImg)).flatten()
    ht_mean = textures.mean(axis=0)
    features = np.concatenate([hu_moments,ht_mean])
    return features

def textureExtractionOnly(smoothedImg): 
    features = []
    textures = mt.features.haralick(smoothedImg)
    ht_mean = textures.mean(axis=0)
    return ht_mean

In [None]:
##Reading in the images from the 20GB of data downloaded 
scans_path = "/Users/arelivalencia/Desktop/Images" 
list_of_scans = os.listdir(scans_path)

num_of_scans = len(list_of_scans)
allData = np.zeros((281270,num_of_scans))

for scan_num in range(len(list_of_scans)):
    scan_path = os.path.join(scans_path, list_of_scans[scan_num])
    image = io.imread(scan_path)
    
    normalizedImg = normalizeImages(image)
    smoothedImg = segmentAndSmooth(normalizedImg)
    features = featureExtraction(normalizedImg, smoothedImg)
    
    imgFlattened = normalizedImg.flatten()
    img_Features = np.concatenate([imgFlattened,features])
    
    allData[:,scan_num] = img_Features

np.savetxt("allData.csv", allData, delimiter=",")

In [None]:
##Reading in the images from the 20GB of data downloaded 
scans_path = "/Users/arelivalencia/Desktop/Images" 
list_of_scans = os.listdir(scans_path)

num_of_scans = len(list_of_scans)
data_Textures = np.zeros((281263,num_of_scans))

for scan_num in range(len(list_of_scans)):
    scan_path = os.path.join(scans_path, list_of_scans[scan_num])
    image = io.imread(scan_path)
    
    normalizedImg = normalizeImages(image)
    smoothedImg = segmentAndSmooth(normalizedImg)
    textures = textureExtractionOnly(smoothedImg)
    
    imgFlattened = normalizedImg.flatten()
    img_Features = np.concatenate([imgFlattened,textures])
                      
    data_Textures[:,scan_num] = img_Features

np.savetxt("Data with images and Harlick textures - no shape features.csv", data_Textures, delimiter=",")