# to_Mark_DataViz_linked_brush
*Leo Portes, 2024-06-12**


This notebook utilizes the pandas dataframe created in the notebook *to_Mark_generate_DF_with_Haralick_tsne_and_TilesImages.ipynb*!

In [1]:
# imports 

import pandas as pd
import numpy as np
from tqdm import tqdm 

import matplotlib.pyplot as plt # for plotting
import seaborn as sns
from matplotlib import image as plt_image

from bokeh.plotting import figure, show, output_notebook, output_file, save, curdoc
from bokeh.models import ColumnDataSource, HoverTool, CustomJS, CategoricalColorMapper, TabPanel, Tabs
from bokeh.layouts import column, layout
from bokeh.transform import factor_cmap, factor_mark

from bokeh.plotting import figure, show, output_notebook, output_file, save
from bokeh.transform import factor_cmap, factor_mark
from bokeh.palettes import Spectral5, Colorblind5
from bokeh.layouts import gridplot
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, CDSView, GroupFilter, Legend


from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))



from io import BytesIO
from PIL import Image
import base64


output_notebook()

# Loading data

## Loading tsne dataframe

In [2]:
# Loading tsne dataframe

file_path = r"../data/"
file_name = r"df_tsne_consolidated"

df_tsne = pd.read_csv(file_path+file_name+".csv", index_col=0)

df_tsne.head()

Unnamed: 0,comp_1,comp_2,"$f_{1,1}$","$f_{1,2}$","$f_{1,3}$","$f_{1,4}$","$f_{1,5}$","$f_{1,6}$","$f_{1,7}$","$f_{1,8}$",...,"$f_{2,6}$","$f_{2,7}$","$f_{2,8}$","$f_{2,9}$","$f_{2,10}$","$f_{2,11}$","$f_{2,12}$","$f_{2,13}$",image_grav_clip2,image_mag_clip2a
0,6.80734,-12.69941,0.194413,-0.916113,0.647294,-0.543771,1.032045,-0.682225,-0.539366,0.073122,...,-0.304796,-0.487892,0.236145,0.632741,-0.699024,1.007154,0.662627,0.283684,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
1,-12.988842,2.909099,-0.44301,-0.681694,0.755008,0.277095,-0.600932,0.419586,0.280973,1.004452,...,0.256364,-0.076212,0.648961,0.87308,-0.872153,1.574497,-0.028229,0.529043,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
2,-7.056424,-12.275893,-0.048456,-0.348508,0.300492,-0.253396,0.189545,-0.911383,-0.251761,-0.061143,...,-0.170031,0.206781,0.54935,0.533747,-0.54357,0.889954,-0.402965,0.556447,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
3,-17.795529,12.587688,0.032167,-0.867496,0.732416,-0.207646,0.019786,-0.47971,-0.203205,0.5432,...,-0.30569,0.091103,0.237323,0.267533,-0.363097,0.61539,-0.257601,0.509949,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
4,-24.337994,-12.11256,0.053267,-1.078864,0.893719,-0.231236,0.378011,-0.891892,-0.225689,0.1491,...,0.882385,0.040029,0.662913,0.42758,-0.22908,-0.066438,-0.836656,0.591656,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."


## Loading whole images (not the tiles) and cropping

Here we assume that the *df_tsne* dataframe has columns with prefix *image_* that reflext the orignal image file names. 

In [3]:
# getting the image file names from the dataframe

file_name_list = [i.split("image_")[1] 
                  for i in df_tsne.columns.to_numpy() if i.startswith("image_")]

print(file_name_list)

['grav_clip2', 'mag_clip2a']


In [4]:
from PIL import Image
from skimage import exposure


def load_geophysics_and_crop(file_name="grav_clip2.tif", w=40, rescale="hist_eq"): 
    img0 = Image.open(file_path+file_name)
    img0 = np.array(img0) 
    
    if rescale=="stretching":
        # Contrast stretching
        print("- Applying %s to the %s image"%(rescale, file_name))
        p2, p98 = np.percentile(img0, (2, 98))
        img0 = exposure.rescale_intensity(img0, in_range=(p2, p98))
    elif rescale=="hist_eq":
        img0 =exposure.equalize_hist(img0) 
    elif rescale==None:
        print("- Applying %s to the %s image"%("no rescaling", file_name))
        
    
    
    
    # 1.Image cropping  ##########################
    #    (so we can split the image into perfect squared tiles)
    # eg, 40 means 40x40 tiles
    N_r, N_c = [int(np.floor(i/w)) for i in img0.shape]

    return img0[:N_r*w, :N_c*w]

In [5]:
# here we load the images and crop them

w = 40 # should be the same tile size used in the notebook to_Mark_generate_DF_with_Haralick_tsne_and_TilesImages

rescale_contrast = ["stretching", "stretching"] # lenght should match len(file_name_list)

img_list = [load_geophysics_and_crop("%s.tif"%(file_name_list[i]), w, rescale_contrast[i]) 
            for i in range(len(file_name_list))]

- Applying stretching to the grav_clip2.tif image
- Applying stretching to the mag_clip2a.tif image


# Mapping tsne to geographycal position

In [6]:
# Original central position of each tile
px = np.arange(0,img_list[0].shape[1], w) + w/2
py = np.arange(0,img_list[0].shape[0], w) + w/2

df_pxy = pd.DataFrame([[x, y] for y in py for x in px ], columns=["x", "y"])

df_tsne['x'] = df_pxy['x'].values
df_tsne['y'] = df_pxy['y'].values[::-1] # Bokeh y-axis points up (in contrast with Matplotlib)

display(df_tsne.head())

Unnamed: 0,comp_1,comp_2,"$f_{1,1}$","$f_{1,2}$","$f_{1,3}$","$f_{1,4}$","$f_{1,5}$","$f_{1,6}$","$f_{1,7}$","$f_{1,8}$",...,"$f_{2,8}$","$f_{2,9}$","$f_{2,10}$","$f_{2,11}$","$f_{2,12}$","$f_{2,13}$",image_grav_clip2,image_mag_clip2a,x,y
0,6.80734,-12.69941,0.194413,-0.916113,0.647294,-0.543771,1.032045,-0.682225,-0.539366,0.073122,...,0.236145,0.632741,-0.699024,1.007154,0.662627,0.283684,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...",20.0,1460.0
1,-12.988842,2.909099,-0.44301,-0.681694,0.755008,0.277095,-0.600932,0.419586,0.280973,1.004452,...,0.648961,0.87308,-0.872153,1.574497,-0.028229,0.529043,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...",60.0,1460.0
2,-7.056424,-12.275893,-0.048456,-0.348508,0.300492,-0.253396,0.189545,-0.911383,-0.251761,-0.061143,...,0.54935,0.533747,-0.54357,0.889954,-0.402965,0.556447,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...",100.0,1460.0
3,-17.795529,12.587688,0.032167,-0.867496,0.732416,-0.207646,0.019786,-0.47971,-0.203205,0.5432,...,0.237323,0.267533,-0.363097,0.61539,-0.257601,0.509949,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...",140.0,1460.0
4,-24.337994,-12.11256,0.053267,-1.078864,0.893719,-0.231236,0.378011,-0.891892,-0.225689,0.1491,...,0.662913,0.42758,-0.22908,-0.066438,-0.836656,0.591656,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...",180.0,1460.0


# t-SNE Atlas | Interactive

In [7]:
%%javascript
IPython.notebook.kernel.execute('nb_name = "' + IPython.notebook.notebook_name + '"')

<IPython.core.display.Javascript object>

In [8]:
tile_size_units = "data" # options: "data", "screen"

# plotting the selected images
def get_array_from_imagebase64_in_df(row, use_var="image_mag"):
    img_temp = row[use_var].split("data:image/png;base64,")[1]
    bytes_decoded = base64.b64decode(img_temp)
    img_temp = Image.open(BytesIO(bytes_decoded))
    img_temp = np.array(img_temp.convert("L"))

    return img_temp

In [9]:
# Building Bokeh's ColumnDataSource

data_dict = {'comp_1':df_tsne.comp_1.values,
             'comp_2':df_tsne.comp_2.values,
             'x':df_tsne.x.values, 
             'y':df_tsne.y.values}

# adding image tiles
j = 0
for j in tqdm(range(len(file_name_list))):
    data_dict.update(
        {'image_%s'%(file_name_list[j]): [get_array_from_imagebase64_in_df(
        row, use_var="image_%s"%(file_name_list[j])) 
                                     for _, row in df_tsne.iterrows()]}
                    )
    
print("Sanity check:\n", data_dict.keys())

# data source used by Bokeh
source = ColumnDataSource(data = data_dict)
data_dict = list() # saving memory

100%|█████████████████████████████████████████████| 2/2 [00:00<00:00,  2.82it/s]

Sanity check:
 dict_keys(['comp_1', 'comp_2', 'x', 'y', 'image_grav_clip2', 'image_mag_clip2a'])





In [None]:
# Interactive plot | linked brush

fig_w, fig_h = 800, 600
#fig_w, fig_h = 400, 300

dw_img, dh_img = img_list[0].shape[1], img_list[0].shape[0]

xmax = 1.1*np.max(df_tsne['comp_1'].values, axis=None)
ymax = 1.1*np.max(df_tsne['comp_2'].values, axis=None)

sizex = 1 # width and length of the square images
sizey = 1 #sizex*ymax/xmax


if tile_size_units == 'data': 
    dh_units, dw_units = 'data','data'
    dw, dh = sizex, sizey
elif tile_size_units == 'screen': 
    dh_units, dw_units = 'screen','screen'
    dw, dh = 20, 20 # display size of each tile in pixels.
    
    
    


TOOLS = "pan,wheel_zoom,reset,box_select, tap, save" # box_zoom

def get_plot(source, tile_size_units="data", use_var="image_mag", x_range=None, y_range=None, tools=TOOLS): 
    from bokeh.models import LinearColorMapper, BoxZoomTool

    #palette = diverging_palette(gray(256), gray(256), n=500)
    palette = "Magma256"
    color_mapper = LinearColorMapper(palette=palette, low=0, high=255)


    
    if x_range == None: 
        p = figure(width=fig_w, height=fig_h, match_aspect=True, tools=TOOLS)
    else: 
        p = figure(width=fig_w, height=fig_h, x_range=x_range, y_range=y_range, tools=TOOLS)

    p.rect(x="comp_1", y='comp_2', source=source, width=dw, height=dh, fill_color=None,line_color='#AAFF00',
           line_width=3, width_units="data", height_units="data")
    
    p.add_tools(BoxZoomTool(match_aspect=True))
    
    # plotting the selected images
    p.image(image=use_var, x='comp_1', y='comp_2',source=source,
                dh_units=dh_units, dw_units=dw_units,
                dw=dw, dh=dh, origin="top_left", anchor='center_center', color_mapper=color_mapper, # origin: bottom_left or top_left, palette="Magma256"
                dilate=True)
        

    return p

# LEFT #####################################################

left_list = [get_plot(source, tile_size_units="data", use_var="image_%s"%(file_name_list[i]), tools=TOOLS) 
         for i in range(len(file_name_list))]

for i in range(1, len(file_name_list)):
    left_list[i].update(x_range=left_list[0].x_range, y_range=left_list[0].y_range)
    
for i in range(len(file_name_list)):
    left_list[i].update(title="%s tiles (individual contrast stretching)"%(file_name_list[i]))
    left_list[i].xgrid.grid_line_color = None
    left_list[i].ygrid.grid_line_color = None
    left_list[i].axis.visible = False


# RIGHT #####################################################
def get_plot_right(img, source, tools=TOOLS): 
    from bokeh.models import LinearColorMapper

    right = figure(width=fig_w, height=fig_h, match_aspect=True, tools=TOOLS,
               background_fill_color="#fafafa", y_axis_location="right")

    right.image([img], x=0, y=0, dw=dw_img, dh=dh_img, palette="Magma256", origin="top_left")
    
    # marking original location
    right.rect(x="x", y='y', source=source, width=w, height=w, fill_color=None, line_color='#AAFF00',
        width_units="data", height_units="data")
    
    
    return right


right_list = [get_plot_right(img_list[i], source, tools=TOOLS) 
         for i in range(len(file_name_list))]

for i in range(1, len(file_name_list)):
    right_list[i].update(x_range=right_list[0].x_range, y_range=right_list[0].y_range)
    
from bokeh.models import BoxZoomTool
for i in range(len(file_name_list)):
    right_list[i].update(title="%s | %s constrat stretching"%(file_name_list[i], rescale_contrast[i]))
    right_list[i].xgrid.grid_line_color = None
    right_list[i].ygrid.grid_line_color = None
    right_list[i].add_tools(BoxZoomTool(match_aspect=True))





grid_list = [gridplot([[left_list[i], right_list[i]]]) for i in range(len(right_list))]
tab_list  = [TabPanel(child = grid_list[i], title = file_name_list[i]) for i in range(len(right_list))]

# Add the tabs into a Tabs object
tabs_object = Tabs(tabs = tab_list)
#show(p)

show(tabs_object)

In [None]:
# Uncomment the last line to save the interactive HTML file 

output_file(filename=file_path+"tsne_linked_brush", title="nb_name %s"%nb_name)
#save(tabs_object)