In [12]:
from IPython.display import display, HTML

# import matplotlib and numpy
import matplotlib.pyplot as plt 
import matplotlib.image as mpimage
import matplotlib.patches as patches

from matplotlib import cm
import colorcet as cc
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1 import make_axes_locatable

import numpy as np 
import pandas as pd
import cv2

# import operating system and glob libraries
import os

import time
from datetime import datetime

import scipy
from scipy import ndimage

from pathlib import Path

# this following line tells Jupyter to display images here in the browser, 
# rather than in separate window.
%matplotlib inline 

dicomImage_issues = []
observationImage_issues = []

In [13]:
%%capture
%run numpngw.ipynb

In [14]:
cmap_grey = cc.cm.linear_grey_0_100_c0

In [15]:
CBIS_DDSM_dir          = "/Users/kasparlund/AICodeData/mammography-data/"
Converted_dir          = CBIS_DDSM_dir+"converted_images/"

Converted_png          = Converted_dir+"png/"
Converted_png_test     = Converted_png+"test/"
Converted_png_training = Converted_png+"train/"

Converted_png_test_x_ray  = Converted_png_test+"/xray_images/"
Converted_png_train_x_ray = Converted_png_training+"xray_images/"

cases = pd.read_csv(Converted_dir+"converted_data_with_dimensions.csv", sep=";")
cases.head(5)

Unnamed: 0.1,Unnamed: 0,case_id,height,width,obs_id,simple_pathology,patient_id,purpose,density,left_right,projection,obs id,observation,obs_shape,obs_distribution,assessment,pathology,subtlety
0,0,Calc-Test_P_00038_LEFT_CC,3601,1296,Calc-Test_P_00038_LEFT_CC_1,BENIGN,P_00038,test,2.0,LEFT,CC,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2
1,1,Calc-Test_P_00038_LEFT_MLO,4060,1695,Calc-Test_P_00038_LEFT_MLO_1,BENIGN,P_00038,test,2.0,LEFT,MLO,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2
2,2,Calc-Test_P_00038_RIGHT_CC,3309,1198,Calc-Test_P_00038_RIGHT_CC_1,BENIGN,P_00038,test,2.0,RIGHT,CC,1,calcification,VASCULAR,0,2,BENIGN_WITHOUT_CALLBACK,5
3,4,Calc-Test_P_00038_RIGHT_MLO,3771,1526,Calc-Test_P_00038_RIGHT_MLO_1,BENIGN,P_00038,test,2.0,RIGHT,MLO,1,calcification,VASCULAR,0,2,BENIGN_WITHOUT_CALLBACK,5
4,6,Calc-Test_P_00041_LEFT_CC,4682,2798,Calc-Test_P_00041_LEFT_CC_2,BENIGN,P_00041,test,1.0,LEFT,CC,2,calcification,LUCENT_CENTER,0,2,BENIGN_WITHOUT_CALLBACK,5


In [16]:
equalized_dir       = CBIS_DDSM_dir+"equalized2dogscatshist/"
equalized_test_dir  = equalized_dir+"images/test/"
equalized_train_dir = equalized_dir+"images/train/"
os.makedirs(equalized_train_dir, exist_ok=True)
os.makedirs(equalized_test_dir,  exist_ok=True)

In [17]:
def dogscatshistogram():
    path   = Path("../../fastai-data/data/dogscats/smoothed_histogram.csv")
    df     = pd.read_csv(path) 
    bins   = np.append( np.asarray(df.bins), 1)
    counts = np.asarray( df.counts )
    return bins, counts

In [18]:
def simplePlot(img, title, cmap, figsize=(8,8)):
    fig = plt.figure(figsize=figsize )
    plt.title(title)
    plt.imshow(img, cmap)    
    plt.show()

In [19]:
def doublePlot(mainTitle, title1, img1, title2, img2, colormap, width = 16 ):
    height = round( (width*img1.shape[0]) /img1.shape[1] )

    fig = plt.figure(figsize = (width,height)) 
    plt.tight_layout()
    #fig.suptitle(mainTitle)

    ax1 = fig.add_subplot(221)
    ax1.set_title(title1)
    im1 = ax1.imshow(img1, cmap=colormap)
    divider = make_axes_locatable(ax1)
    cax = divider.append_axes("bottom", "5%", pad="3%")
    colorbar = fig.colorbar(im1, cax=cax, orientation="horizontal")

    ax2 = fig.add_subplot(222)
    ax2.set_title(title2)

    im2 = ax2.imshow(img2, cmap=colormap)
    divider2 = make_axes_locatable(ax2)
    cax2 = divider2.append_axes("bottom", "5%", pad="3%")
    colorbar2 = fig.colorbar(im2, cax=cax2, orientation="horizontal")

    plt.show()

In [20]:
def plotHistogramsBeforeAndAfter(title, before, after, limits=[0.01, 0.99], x=None):
    fig=plt.figure(figsize = (12,12), dpi=100) 
    plt.suptitle(title, fontsize=15)
    plt.tight_layout()

    nb_plot_bins = 512
    ax1 = fig.add_subplot(221)
    ax1.set_title("before")
    plt.hist(before.flatten(), nb_plot_bins, limits )

    ax2 = fig.add_subplot(222)
    ax2.set_title("After: " )
    plt.hist(after.flatten(), nb_plot_bins, limits )
    plt.show()

In [21]:
def InverseGlobalEqualization( img, cdf_m, cdf_m_min, cdf_m_max, minValue, maxValue):
    cdf_m  = cdf_m[1:]

    cdf_m -= minValue
    cdf_m  = cdf_m *(cdf_m_max-cdf_m_min)/( maxValue - minValue ) + cdf_m_min
    return cdf_m
    
def cdm( hist, bins, minValue, maxValue ):
    cdf_m = hist.cumsum()
    cdf_m_org = np.copy(cdf_m)
    cdf_m_org = np.insert(cdf_m_org,0,0)
    
    cdf_m_min = cdf_m.min() 
    cdf_m_max = cdf_m.max()

    cdf_m  = (cdf_m - cdf_m_min)*(maxValue-minValue) / (cdf_m_max-cdf_m_min)
    cdf_m += minValue
    cdf_m  = np.insert(cdf_m,0,0)
    
    """
    #print(f"cdf_m.shape:{cdf_m.shape} bins.shape:{bins.shape}")
    print(f"cdf_m_min:{cdf_m_min} cdf_m_max:{cdf_m_max}")
    print(f"cdf_m.mins{cdf_m[0:3]} cdf_m.max{cdf_m[-3:]}")
    print(f"bins.mins:{bins[0:3]} bins.max:{bins[-3:]}")
    """   
    return cdf_m, hist, bins, cdf_m_min, cdf_m_max, cdf_m_org

def globalEqualization( img, minValue=0.01, maxValue=1, nb_bins=1000, targetBins=None, targetHist=None, plot=False):

    hist,bins = np.histogram(im.flatten(), nb_bins, [minValue, maxValue])
    cdf, hist, bins, cdf_min, cdf_max, cdf_org =  cdm( hist, bins, minValue, maxValue )

    tCdf, tHist, tBins, tCdf_min, tCdf_max, tCdf_org = cdm( targetHist, targetBins, minValue, maxValue )
    
    """
    #two step
    equalizedImage = np.interp(img.flatten(), bins, cdf)
    equalizedImage.shape = img.shape
    twoStepEqualized = np.interp(equalizedImage.flatten(), tCdf, tBins)
    twoStepEqualized.shape = img.shape
    """
    
    #one step equalization transform the cdf to the target profile so that we can transform the orignal image    
    #f = scipy.interpolate.interp1d(tCdf, tBins, kind="cubic")  cdf = f(cdf)
    cdf = np.interp(cdf, tCdf, tBins)
    oneStepEqualized = np.interp(img.flatten(), bins, cdf)
    oneStepEqualized.shape = img.shape
    
    if plot :
        diff         = oneStepEqualized - twoStepEqualized
        absDiff      = np.abs(diff)
        maxAbsDiff   = np.max( absDiff )
        maxDiff      = np.max( diff )
        minDiff      = np.min( diff )
        nmeanAbsDiff = np.mean( np.abs(diff) )
        absDiff[ absDiff >=maxAbsDiff*0.002 ] = 1
        print(f"max diff:{maxDiff}\nmin diff:{minDiff}\nmaxAbsDiff:{maxAbsDiff}\nmeanAbsDiff:{nmeanAbsDiff}")
        
        doublePlot(r.case_id,"original",im, "diff", absDiff, cmap_grey, width = 6)
        
        #doublePlot(r.case_id,"original",im, "with global equalization", equalizedImage, cmap_grey, width = 6)
        #doublePlot(r.case_id,"with global equalization",im, "with dogs & cats histogram profile", oneStepEqualized, cmap_grey, width = 6)
        
        
        """
        fig = plt.figure(figsize = (12,12)) 
        plt.tight_layout()
        ax1 = fig.add_subplot(221)
        ax1.set_title("acc hist - before")
        ax1.plot(cdf_org, color = 'b')
    
        ax2 = fig.add_subplot(222)
        ax2.set_title("after")
        im2 = ax2.plot(cdf, color = 'b')
        
        hist2,bins2 = np.histogram(equalizedImage.flatten(), nb_bins)
        cdf2, hist2, bins2, cdf2_min, cdf2_max, cdf2_org =  cdm( hist2, bins2, cdf_min, cdf_max )
        
        #insert so all arry have the same size
        hist3,bins3 = np.histogram(reversEqualizedImage.flatten(), nb_bins, [minValue, maxValue])
        hist  = np.insert(hist, 0,0)
        hist2 = np.insert(hist2,0,0)
        hist3 = np.insert(hist3,0,0)
        bins3 = np.insert(bins3,0,0)[0:-1]
        #print(f"cdf2:{cdf2.shape} bins2:{bins3.shape} hist:{hist3.shape} cdf_org:{cdf2_org.shape}")
        df = pd.DataFrame( data    = np.column_stack([hist,bins,hist2,bins2,hist3,bins3,cdf_org,cdf, cdf2_org,cdf2 ]), 
                           columns = ["hist","bins","hist2","bins2","hist3","bins3","cdf_org","cdf", "cdf2_org","cdf2" ]  )
        display(df)
        """

    return oneStepEqualized

In [22]:
"""
testCases =  [
#"Mass-Training_P_00797_LEFT_CC",
#"Mass-Training_P_00419_LEFT_MLO",
#"Mass-Test_P_00969_LEFT_CC",
#"Calc-Training_P_00937_RIGHT_MLO",
#Mass-Training_P_00997_LEFT_CC" 
]
#"Mass-Training_P_00997_LEFT_CC"  the algo get width wrong because it is less<224
#tile_cases = cases[ np.isin( cases.case_id, testCases ) ]
"""

dcBins, dcHist = dogscatshistogram()

data,unique_indices = np.unique(cases.case_id,return_index=True)
ucs = cases.iloc[unique_indices]

nb_cases = ucs.shape[0]
print("number of unique cases to process : ",nb_cases)

mainLoopStart = time.clock()
nb_bins=np.linspace(0,1,1001)
for i in range(0,nb_cases):
    r         = ucs.iloc[i]    
    src_Path  = Converted_png_test_x_ray if r.purpose=="test" else Converted_png_train_x_ray
    dst_Path  = equalized_test_dir       if r.purpose=="test" else equalized_train_dir
    #print(src_Path  + r.case_id + ".png")
    im    = cv2.imread(src_Path  + r.case_id + ".png",  cv2.IMREAD_ANYDEPTH)/65536.0
    
    im2   = globalEqualization(im, nb_bins=nb_bins, targetBins=dcBins, targetHist=dcHist, plot=False)
    cv2.imwrite(f"{dst_Path}/{r.case_id}.png", (65535.0*im2 + 0.5).astype(np.uint16))
    
    #print("nb values==0: ", np.sum(im==0.0), "nb values==0.01: ", np.sum(im==0.01))
    #print("nb values==0: ", np.sum(im2==0.0), "nb values==0.01: ", np.sum(im2==0.01))
    #print("u_before: ", len(np.unique(im)), " u_after: ", len(np.unique(im2)) )
    #doublePlot(r.case_id,"x-ray before",im,  "x_ray with global equalization", mask, cmap_grey, width = 12)
    #doublePlot(r.case_id,"x-ray before",im2, "x_ray with global equalization", mask, cmap_grey, width = 12)
    #plotHistogramsBeforeAndAfter( "before & after equalization", im, im2, limits=[0.05, 0.99] )
    if i-(int(i/50)*50) == 0 :
        percentage = int(np.around( i*100 / nb_cases))
        print( f"current case: {r.case_id}  Processed cases (%):", round(i/nb_cases*100,0) )

print("Time og main loop", time.clock() - mainLoopStart )


FileNotFoundError: File b'../../fastai-data/data/dogscats/smoothed_histogram.csv' does not exist