## Imports

In [1]:
import cv2
import pandas as pd
import numpy as np
import glob
from pathlib import Path
import re
import shutil
import os
import subprocess

## Extract info from CSV and image filenames

In [2]:
# Define base paths
base_dir = Path.cwd().parent  # current working directory

# Define CSV and image folders relative to script
csv_dir = base_dir / "data" / "CSV"
image_dir = base_dir / "data" / "Images_RGB"

# Extract info from CSV filenames
a = []  # image IDs
b = []  # CSV file paths

for file in glob.glob(f"{csv_dir}/*"):
    b.append(file)

    bit0 = file.split("_1_HP_IM3_0_Core")[0].split(" ")[-1] # e.g.: A2
    bit1 = file.split("_1_HP_IM3_0_Core")[1].split("_")[0].split(",") # e.g. ['[1', '1', '1', '1]']
    
    p3 = f"{int(bit1[1]):02d}" # e.g. 01
    p4 = f"{int(bit1[2]):02d}" # e.g. 01
    
    a.append([bit0, p3, p4])

# Extract info from image filenames
c = []  # image IDs
d = []  # image file paths (tif)

for file in glob.glob(f"{image_dir}/*"):
    d.append(file)

    bit2 = file.split("_1_HP_IM3_0_Core")[0].split(" ")[-1]
    bit3 = file.split("_1_HP_IM3_0_Core")[1].split("_")[0].split(",")
    
    p1 = f"{int(bit3[1]):02d}"
    p2 = f"{int(bit3[2]):02d}"
    
    c.append([bit2, p1, p2])

# Sanity check: print the first parsed image result
if c and d:
    print("Example parsed image:", c[0])
    print("Example image path:", d[0])
else:
    print("No image files found.")

Example parsed image: ['B2', '03', '08']
Example image path: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/Images_RGB/NSB__NUH B2_1_HP_IM3_0_Core[1,3,8,1]_[41694,8530]_component_data.tif - resolution #1.jpg


## Create masks by drawing circles at each detected cell location

In [3]:
# Already have: a, b, c, d (from previous steps) and the base directory
# Define the output path for masks
mask_dir = base_dir /  "data" / "Mask"
mask_dir.mkdir(exist_ok=True) # create the folder if it doesn't exist

# Loop through CSV/image matches
for j, item in enumerate(a):  # a[j] contains ID info from CSV
    try:
        i = c.index(item)  # Match with image ID
    except ValueError:
        print(f"No match for: {item}")
        continue

    image3 = cv2.imread(d[i])
    image4 = image3.copy()

    df = pd.read_csv(b[j], sep='\t')

    print("CSV:", b[j])
    print("Image:", d[i])

    # Overlay red dots at centroids
    for _, row in df.iterrows():
        x = int(row["Centroid X µm"]) * 2
        y = int(row["Centroid Y µm"]) * 2
        cv2.circle(image3, (x, y), radius=0, color=[0, 0, 255], thickness=15)
        cv2.circle(image4, (x, y), radius=5, color=[0, 0, 200], thickness=15)

    # Blend overlays (creates composite image)
    dst_0 = cv2.addWeighted(image3, 0.5, image4, 0.5, 0)

    # Save output
    outname = os.path.splitext(os.path.basename(b[j]))[0]  + ".tiff"
    out_path = mask_dir / outname
    cv2.imwrite(str(out_path), dst_0)


CSV: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/CSV/NSB__NUH B2_1_HP_IM3_0_Core[1,7,14,1]_component_data.tif - resolution #1.csv
Image: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/Images_RGB/NSB__NUH B2_1_HP_IM3_0_Core[1,7,14,1]_component_data.tif - resolution #1.jpg
CSV: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/CSV/NSB__NUH B2_1_HP_IM3_0_Core[1,4,8,1]_[41807,10408.5]_component_data.tif - resolution #1.csv
Image: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/Images_RGB/NSB__NUH B2_1_HP_IM3_0_Core[1,4,8,1]_[41807,10408.5]_component_data.tif - resolution #1.jpg
CSV: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/CSV/NSB__NUH B2_1_HP_IM3_0_Core[1,2,13,1]_component_data.tif - resolution #1.csv
Image: /Users/victoireringler/Documents/Singapore/Thesis/PatternExtract/data/Images_RGB/NSB__NUH B2_1_HP_IM3_0_Core[1,2,13,1]_component_data.tif - resolution #1.jpg
CSV: /U

## Run for Windows OS

In [None]:
## Windows OS
import os
import subprocess

# 1. Directories and files definition
# Already have: base directory and mask_dir
os.chdir("C:/Program Files/QuPath-0.6.0")  # Update this to your actual QuPath install path
create_project_script = base_dir / "scripts" / "createproject_ki67.groovy" # Path to Groovy script that creates the QuPath project and adds images
annotate_script = base_dir / "scripts" / "annotate_ki67_cells.groovy" # Path to Groovy script that annotates the images
project_dir = base_dir / "Project" / "ki67"
project_file = project_dir / "project.qpproj"
qupath_bin = "QuPath-0.6.0 (console).exe" # Use actual QuPath  Windows console executable
geojson_dir = base_dir / "data" / "geoJSON"

# 2. Run Groovy script to create and populate the QuPath project
subprocess.run([
    qupath_bin,  
    "script",
    "--args", str(mask_dir),
    create_project_script
])

# 3. Run the annotation script, save the project (generates GeoJSON, etc.)
subprocess.run([
    qupath_bin,
    "script",
    "--save",
    "--project", project_file,
    "--args", str(geojson_dir),   # export directory here
    annotate_script
])

# 4. Optionally open the project in QuPath GUI for inspection
subprocess.Popen([
    qupath_bin,
    "--project", str(project_file)
])

In [None]:
## Windows OS
import subprocess

# Path to the R script that performs downstream analysis (e.g., spatial stats, plotting)
Rfile = base_dir / "scripts" / "R_script_ki67.R"

# Full path to Rscript executable (update if your R version or install path differs)
rscript_exe = "C:/Program Files/R/R-4.3.1/bin/Rscript.exe"

# Run the R script with '--vanilla' to avoid loading user environment or site files
subprocess.call([rscript_exe, "--vanilla", Rfile])


## Run for MacOS

In [None]:
## Mac OS
import os
import subprocess

# 1. Directories and files definition
# Already have: base directory and mask_dir
os.chdir("/Applications/QuPath-0.6.0-arm64.app/Contents/MacOS/")  # Adjust if QuPath installation directory (macOS .app path) is elsewhere
create_project_script = base_dir / "scripts" / "createproject_ki67.groovy" # Path to Groovy script that creates the QuPath project and adds images
annotate_script = base_dir / "scripts" / "annotate_ki67_cells.groovy" # Path to Groovy script that annotates the images
project_dir = base_dir / "Project" / "ki67"
project_file = project_dir / "project.qpproj"
qupath_bin = "./QuPath-0.6.0-arm64" # Use actual QuPath binary on macOS
geojson_dir = base_dir / "data" / "geoJSON"

# 2. Run Groovy script to create and populate the QuPath project
subprocess.run([
    qupath_bin,  
    "script",
    "--args", str(mask_dir),
    create_project_script
])

# 3. Run the annotation script, save the project (generates GeoJSON, etc.)
subprocess.run([
    qupath_bin,
    "script",
    "--save",
    "--project", project_file,
    "--args", str(geojson_dir),   # export directory here
    annotate_script
])

# 4. Optionally open the project in QuPath GUI for inspection
subprocess.Popen([
    qupath_bin,
    "--project", str(project_file)
])



15:13:17.499 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting default Locale to en_US
15:13:17.500 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for FORMAT to en_US
15:13:17.500 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for DISPLAY to en_US
15:13:17.512 [main] [INFO ] qupath.lib.common.ThreadTools - Setting parallelism to 10
15:13:17.521 [main] [INFO ] qupath.ScriptCommand - Setting tile cache size to 2304.00 MB (25.0% max memory)
15:13:17.563 [main] [INFO ] qupath.lib.scripting.QP - Initializing type adapters
No project directory found — creating one!
15:13:18.488 [main] [INFO ] q.l.g.i.s.ImageRegionStoreFactory - Setting tile cache size to 2304.00 MB (25.0% max memory)
15:13:18.627 [main] [INFO ] q.l.i.s.b.BioFormatsServerOptions - Setting max Bio-Formats readers to 11
15:13:19.828 [main] [INFO ] qupath.lib.io.PathIO - Writing object hierarchy with 0 object(s)...
15:13:19.830 [main] [INFO ] qupath.lib.io.PathIO - Image data written in 0.01



15:13:22.985 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting default Locale to en_US
15:13:22.986 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for FORMAT to en_US
15:13:22.986 [main] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for DISPLAY to en_US
15:13:23.001 [main] [INFO ] qupath.lib.common.ThreadTools - Setting parallelism to 10
15:13:23.009 [main] [INFO ] qupath.ScriptCommand - Setting tile cache size to 2304.00 MB (25.0% max memory)
15:13:23.044 [main] [INFO ] qupath.lib.scripting.QP - Initializing type adapters
15:13:23.427 [main] [INFO ] qupath.ScriptCommand - Running script for NSB__NUH A2_1_HP_IM3_0_Core[1,5,11,1]_[44614,12789]_component_data.tif - resolution #1.tiff (0/20)
15:13:23.537 [main] [INFO ] q.l.i.s.b.BioFormatsServerOptions - Setting max Bio-Formats readers to 11
15:13:24.786 [main] [INFO ] qupath.lib.io.PathIO - Writing object hierarchy with 1 object(s)...
15:13:24.790 [main] [INFO ] qupath.lib.io.PathIO - Image data written in

<Popen: returncode: None args: ['./QuPath-0.6.0-arm64', '--project', '/Users...>

Oct 27, 2025 3:13:27 PM com.sun.javafx.application.PlatformImpl startup


15:13:27.941 [JavaFX Application Thread] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting default Locale to en_US
15:13:27.942 [JavaFX Application Thread] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for FORMAT to en_US
15:13:27.942 [JavaFX Application Thread] [INFO ] qupath.lib.gui.prefs.PathPrefs - Setting Locale for DISPLAY to en_US
15:13:27.951 [JavaFX Application Thread] [INFO ] qupath.lib.common.ThreadTools - Setting parallelism to 10
15:13:27.989 [JavaFX Application Thread] [INFO ] qupath.lib.gui.QuPathGUI - Initializing: 1761549207989
15:13:28.204 [JavaFX Application Thread] [INFO ] q.l.g.i.s.ImageRegionStoreFactory - Setting tile cache size to 2304.00 MB (25.0% max memory)
15:13:28.394 [JavaFX Application Thread] [INFO ] qupath.lib.gui.QuPathGUI - QuPath build: Version: 0.6.0
Build time: 2025-06-26 15:19
Latest commit tag: "cc7a0a9"
15:13:29.476 [JavaFX Application Thread] [INFO ] qupath.lib.scripting.QP - Initializing type adapters
15:13:29.660 [JavaFX Application 

In [5]:
## MacOS
import subprocess

# Path to the R script you want to execute
Rfile = base_dir / "scripts" / "R_script_ki67.R"

# Path to Rscript binary (standard for R installations on macOS via CRAN)
rscript_exe = "/usr/local/bin/Rscript"  # or run `which Rscript` in terminal to confirm

# Run the R script with '--vanilla' for a clean R session
subprocess.call([rscript_exe, "--vanilla", Rfile])

Loading required package: spatstat.data
Loading required package: spatstat.univar
spatstat.univar 3.1-4
Loading required package: spatstat.geom
spatstat.geom 3.6-0
Loading required package: spatstat.random
spatstat.random 3.4-2
Loading required package: spatstat.explore
Loading required package: nlme
spatstat.explore 3.5-3
Loading required package: spatstat.model
Loading required package: rpart
spatstat.model 3.4-2
Loading required package: spatstat.linnet
spatstat.linnet 3.3-2

spatstat 3.4-1 
For an introduction to spatstat, type ‘beginner’ 

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.2
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::collapse() masks nlme::collapse()
✖ dplyr::filter()   masks stats::filter()
✖ dplyr::lag()     

[1] ">1% outside: 3 samples"
[1] ">5% outside: 2 samples"


NSB__NUH B2_1_HP_IM3_0_Core[1,7,14,1]_component_data.tif - resolution #1.geojson Total points: 2021 Points outside: 103 Percentage: 5.1
Annotating: NSB__NUH C2_1_HP_IM3_0_Core[1,8,14,1]_[48994,18978]_component_data.tif - resolution #1.geojson
NSB__NUH C2_1_HP_IM3_0_Core[1,8,14,1]_[48994,18978]_component_data.tif - resolution #1.geojson Total points: 3348 Points outside: 48 Percentage: 1.43


0