In [None]:
import os
if not os.path.exists("constants.py"):
    os.chdir("..")
from constants import *
from utils_segment_image import *
from utils_segment_whole_slide import *
from segmentation_service import process_ndpi_segmentation_request
import os

In [None]:
import openslide
import json


ndpi_files = [s for s in os.listdir(INPUT_TILES_PATH) if s.endswith(".ndpi") and not s in os.listdir(OUTPUT_TILES_PATH)]
print("Found NDPI files:", ndpi_files)
results = []
for ndpi_file in ndpi_files:
    slide = openslide.OpenSlide(os.path.join(INPUT_TILES_PATH, ndpi_file))
    result = process_ndpi_segmentation_request(slide=slide, output_tiles_path=os.path.join(OUTPUT_TILES_PATH, ndpi_file))
    # Save the result to {OUTPUT_TILES_PATH}/{ndpi_file}/output_stats.json
    output_stats_path = os.path.join(OUTPUT_TILES_PATH, ndpi_file, OUTPUT_STATS_FILENAME)
    os.makedirs(os.path.dirname(output_stats_path), exist_ok=True)
    with open(output_stats_path, 'w') as f:
        json.dump(result, f)
    logging.info(f"Output stats saved to: {output_stats_path}")
    
    results.append({ndpi_file: result})
    slide.close()


In [None]:
results

In [None]:
for i, result in enumerate(results):
    ndpi_file = list(result.keys())[0]
    print(f"Slide {i+1}: {ndpi_file}")
    print(f"  Total Nuclei Count: {result[ndpi_file]['total_nuclei_count']}")
    print(f"  Total Non-background Area (mm²): {result[ndpi_file]['total_non_background_area_mm2']:.2f}")
    print(f"  Density (Nuclei/mm²): {result[ndpi_file]['total_nuclei_count'] / result[ndpi_file]['total_non_background_area_mm2']:.2f}")
    print()

In [None]:
directories = os.listdir(OUTPUT_TILES_PATH)
# make a csv based on the json output_stats.json in each directory.
# Each json file (output_stats.json) contains:
# {
#     "total_nuclei_count": 123,
#     "total_non_background_area_mm2": 456.78
# }
# Each directory is named after the ndpi file.
# The csv should have the following columns:
# ndpi_file,total_nuclei_count,total_non_background_area_mm2,density
import pandas as pd
import json
import os
import csv
import glob
import numpy as np

# Create a list to hold the data
data = []
# Iterate through each directory in the OUTPUT_TILES_PATH
for directory in directories:
    # Construct the path to the output_stats.json file
    json_file_path = os.path.join(OUTPUT_TILES_PATH, directory, OUTPUT_STATS_FILENAME)
    # Check if the file exists
    if os.path.exists(json_file_path):
        # Open and read the JSON file
        with open(json_file_path, 'r') as f:
            stats = json.load(f)
        # Append the data to the list
        data.append({
            'ndpi_file': directory,
            'total_nuclei_count': stats['total_nuclei_count'],
            'total_non_background_area_mm2': stats['total_non_background_area_mm2'],
            'density': stats['total_nuclei_count'] / stats['total_non_background_area_mm2'],
            'mucosa/skin': 'mucosa' if 'mucosa' in directory.lower() else 'skin' if 'skin' in directory.lower() else None,
            'banff score 0-5': '0.5' if '0-1' in directory.lower() else '2.5' if '2-3' in directory.lower() else '4.5' if '4-5' in directory.lower() else '1' if 'banff 1' in directory.lower() or 'grade 1' in directory.lower() else '2' if 'banff 2' in directory.lower() or 'grade 2' in directory.lower() else '3' if 'banff 3' in directory.lower() or 'grade 3' in directory.lower() else '4' if 'banff 4' in directory.lower() or 'grade 4' in directory.lower() else '5' if 'banff 5' in directory.lower() or 'grade 5' in directory.lower() else '0' if 'banff 0' in directory.lower() or 'grade 0' in directory.lower() else None,
        })
# Create a DataFrame from the list
df = pd.DataFrame(data)
# Save the DataFrame to a CSV file
output_csv_path = os.path.join(OUTPUT_TILES_PATH, "output_stats.csv")
df = df.sort_values(by='ndpi_file')
df.to_csv(output_csv_path, index=False)
print(f"Output stats saved to: {output_csv_path}")
# Read the CSV file and print its contents
df = pd.read_csv(output_csv_path)
print(df)
# Plot the density of nuclei per mm² for each slide
import matplotlib.pyplot as plt

# Filter out rows with None values in 'banff score 0-5' or 'mucosa/skin'
filtered_df = df.dropna(subset=['banff score 0-5', 'mucosa/skin'])

# Separate data for skin and mucosa
skin_df = filtered_df[filtered_df['mucosa/skin'] == 'skin']
mucosa_df = filtered_df[filtered_df['mucosa/skin'] == 'mucosa']

# Plot histogram for skin
plt.figure(figsize=(10, 6))
plt.hist(skin_df['banff score 0-5'], weights=skin_df['density'], bins=range(7), alpha=0.7, label='Skin', edgecolor='black')
plt.xlabel('Banff Score')
plt.ylabel('Density (Nuclei/mm²)')
plt.title('Histogram of Banff Score vs Density for Skin')
plt.xticks(range(6))
plt.legend()
plt.tight_layout()
plt.show()

# Plot histogram for mucosa
plt.figure(figsize=(10, 6))
plt.hist(mucosa_df['banff score 0-5'], weights=mucosa_df['density'], bins=range(7), alpha=0.7, label='Mucosa', edgecolor='black')
plt.xlabel('Banff Score')
plt.ylabel('Density (Nuclei/mm²)')
plt.title('Histogram of Banff Score vs Density for Mucosa')
plt.xticks(range(6))
plt.legend()
plt.tight_layout()
plt.show()