# Tissue area calculation Notebook

06/09/2024

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook
%matplotlib widget

In [196]:
def XYarray_from_df(df):
    return df[['Nuc_X', 'Nuc_Y_Inv']].to_numpy()

In [5]:
# Returns the area covered by N 2D-points provided in a 'points' array;
## points = [ (x1,y1), (x2,y2), ... , (xN, yN) ]
# grid_size = [X, Y]: Gives the number of grid cells in x and y axis of the bounding box the 'points' array will be drawn into to calculate the area
## Larger 'grid_size' values mean smaller grid cells, higher precision of the area calculation and longer runtime
def area_of_points(points, grid_size):
    pts_x = [x for x,y in points]
    pts_y = [y for x,y in points]
    
    pts_bb_area = (max(pts_x)-min(pts_x))*(max(pts_y)-min(pts_y))
    h2D,_,_,_ = plt.hist2d(pts_x, pts_y, bins = grid_size, cmin=1)
    numberOfWhiteBins = np.count_nonzero(np.isnan(h2D))
    numberOfAll2Dbins = h2D.shape[0]*h2D.shape[1] 
    areaFactor = 1.0 - numberOfWhiteBins/numberOfAll2Dbins
    pts_pts_area = areaFactor * pts_bb_area
    print(f'Areas: b-box = {pts_bb_area:8.4f}, points = {pts_pts_area:8.4f}')
    
    plt.show()
    
    return pts_pts_area # area of colored points in the box (in pixel2)

In [2]:
def calculate_Tissue_area(data, grid_size):
    array = XYarray_from_df(data)
    tissue_area = area_of_points(array.tolist(), grid_size)
    
    return tissue_area

In [None]:
def df_Tissue_area(df, grid_size):
    area_pxl = calculate_Tissue_area(df, grid_size)
    area_mm = area_pxl * ((0.65*0.65)/1000000) # conversion factor; 0.650 (micrometers/pixels, according to QiTissue) -> divide per 1000000 to convert to square millimeters
    df["Tissue_area(pixel^2)"] = area_pxl
    df["Tissue_area(mm^2)"] = area_mm
    
    return df

In [6]:
def main_Tissue_area(df_name, grid_size):
    data = pd.read_csv('{}.csv'.format(df_name))

    # Calculate tissue area separately for each sample
    df_list = [] # where each updated df (with the tissue area) wil be stored
    for s in data.Sample_scene.unique():
        df_s = data.loc[data.Sample_scene == s]
        df_s = df_Tissue_area(df_s, grid_size)
        df_list.append(df_s)

    final_df = pd.concat(df_list, ignore_index=True)  # reassemble all updated dataframes

    return final_df

Prepare your data before running the next cell. It must at least contain the following columns:

* "Sample_scene": Samples with big data usually contain scenes. Otherwise, replace it with "Sample_ID" in the above code

* "Nuc_X": X cell coordinates

* "Nuc_Y_Inv": Y cell coordinates

In [None]:
df_tissue_area = main_Tissue_area("name_of_CSV_df_file", [150, 150]) # 'grid_size' may vary as the user's wish, but [150, 150] will be the default to set binning size