# Preprocessing and Counting PML Bodies

In [43]:
import json
import numpy as np
from scipy.spatial import distance
from skimage import io  # Importing image loading library
import os
import cv2
import pandas as pd

In [39]:
json_path = '/Users/pallavisingh/Library/CloudStorage/OneDrive-SharedLibraries-DalhousieUniversity/Priyadharshini Sridharan - Images from Dellaire Lab/input/combined_annotations.json'

In [40]:
# Load JSON file
with open(json_path) as f:
    data = json.load(f)

# Initialize lists to store results for each image
inside_counts = []
outside_counts = []
mean_distances = []
variance_distances = []
inside_densities = []
outside_densities = []

In [46]:
df = pd.DataFrame(data)
df

Unnamed: 0,image_name,label,nucleus_mask,dots
0,flattened_position_10_C1,not arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 469, 'y': 511..."
1,flattened_position_6_C1,not arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 223, 'y': 511..."
2,flattened_position_4_C1,not arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 193, 'y': 472..."
3,flattened_position_6-1_C1,not arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 504, 'y': 510..."
4,flattened_position_7-1_C1,not arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 64, 'y': 504}..."
...,...,...,...,...
75,flattened_position_19_C1,arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 441, 'y': 416..."
76,flattened_position_15_C1,arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 394, 'y': 441..."
77,flattened_position_40_C1,arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 301, 'y': 457..."
78,flattened_position_23_C1,arsenic,/Users/pallavisingh/Documents/test/binary_mask...,"[{'dot_id': 1, 'position': {'x': 312, 'y': 507..."


In [48]:
# Process each image entry in the JSON data
for entry in data:
    # Load the nucleus mask image as a numpy array using OpenCV
    x = str(entry['nucleus_mask'].strip())  # Ensure no leading/trailing spaces
    nucleus_mask = cv2.imread(entry['nucleus_mask'], cv2.IMREAD_GRAYSCALE)  # Read the image in grayscale

    if nucleus_mask is None:
        print(f"Failed to load image: {x}")
        continue

    # Ensure the mask is binary (0s and 1s)
    _, nucleus_mask = cv2.threshold(nucleus_mask, 127, 1, cv2.THRESH_BINARY)  # Convert to binary mask

    pml_bodies = entry['dots']  # Access the dots (PML body coordinates) from JSON
    image_height, image_width = nucleus_mask.shape

    # Count PML bodies inside and outside the nucleus
    inside_count = 0
    outside_count = 0

    for dot in pml_bodies:
        x, y = dot['position']['x'], dot['position']['y']  # Get coordinates from dot entry
        if dot['location'].strip() == 'inside_nucleus':  # Inside nucleus
            inside_count += 1
        else:  # Outside nucleus
            outside_count += 1

    # Store counts
    inside_counts.append(inside_count)
    outside_counts.append(outside_count)

    # Calculate distances if needed
    # Assuming you want to calculate distances here (commented out in your original code)
    distances = []  # You might want to calculate distances based on your requirement

    # Calculate mean and variance of distances (if applicable)
    if distances:
        mean_distances.append(np.mean(distances))
        variance_distances.append(np.var(distances))
    else:
        mean_distances.append(0)
        variance_distances.append(0)

    # Calculate densities (dots per unit area)
    nucleus_area = np.sum(nucleus_mask)  # This will count the number of pixels in the nucleus
    non_nucleus_area = image_height * image_width - nucleus_area

    inside_density = inside_count / nucleus_area if nucleus_area > 0 else 0
    outside_density = outside_count / non_nucleus_area if non_nucleus_area > 0 else 0

    inside_densities.append(inside_density)
    outside_densities.append(outside_density)

# Print summary statistics
for i, entry in enumerate(data):
    print(f"Image {i + 1} - {entry['image_name']}:")
    print("PML bodies inside nucleus:", inside_counts[i])
    print("PML bodies outside nucleus:", outside_counts[i])
    print("Average distance to nucleus edge:", mean_distances[i])
    print("Variance of distances to nucleus edge:", variance_distances[i])
    print("Density of PML bodies inside nucleus:", inside_densities[i])
    print("Density of PML bodies outside nucleus:", outside_densities[i])
    print("\n")

Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_10_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_6_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_4_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_6-1_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_7-1_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_8_C1_mask.tiff
Failed to load image: /Users/pallavisingh/Documents/test/bi

[ WARN:0@458.812] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_10_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.813] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_6_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.813] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_4_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.813] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_6-1_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WAR

IndexError: list index out of range

@458.814] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_17_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.814] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_1_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.814] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_3_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458.814] global loadsave.cpp:248 findDecoder imread_('/Users/pallavisingh/Documents/test/binary_mask_nucleus_high_arsenic/binary_mask_nucleus_non_arsenic/flattened_position_10-1_C1_mask.tiff'): can't open/read file: check file path/integrity
[ WARN:0@458