In [1]:
import spatialdata as sd
import spatialdata_plot
from spatialdata_io import xenium, codex
from pathlib import Path
import shutil
import sopa



## Convert raw data into .zarr format

### Xenium data

In [None]:
# Define source and destination directories
xenium_path_read = Path('/scratch/lyarab/Xenium/Run3')  # Folder containing Xenium datasets
xenium_path_write = Path('../data/SpatialData/Xenium')  # Destination folder for .zarr files

# Ensure the destination folder exists
xenium_path_write.mkdir(parents=True, exist_ok=True)

# Iterate over each folder in xenium_path_read
for folder in xenium_path_read.iterdir():
    if folder.is_dir():  # Ensure it's a directory
        input_path = str(folder)
        output_path = str(xenium_path_write / f"{folder.name}.zarr")

        print(f"Processing: {folder.name}")

        # Parse the data
        print("Parsing the data... ", end="")
        sdata = xenium(
            path=input_path,
            n_jobs=1,  
            cells_boundaries=True,  
            nucleus_boundaries=True,
            morphology_focus=True,
            cells_as_circles=True,
        )
        print("done")

        # Write the data
        print("Writing the data... ", end="")
        sdata.write(output_path, overwrite = True)
        print("done")

print("All files processed successfully!")

### CODEX data

#### Slide by slide

In [27]:
# Define source and destination directories
codex_path_read = Path('/scratch/lyarab/CODEX/')  # Folder containing CODEX datasets
codex_path_write = Path('../data/SpatialData/CODEX/')  # Destination folder for .zarr files

# Ensure the destination folder exists
codex_path_write.mkdir(parents=True, exist_ok=True)

# Iterate over each folder in codex_path_read
for file in codex_path_read.iterdir():
    if file.is_file() and not file.name.startswith("._"):  # Ignore hidden/system files
        input_path = str(file)  
        output_path = str(codex_path_write / f"{file.stem}.zarr")  # Use filename without extension

        print(f"Processing: {file.name}")

        try:
            # Parse the data
            print("Parsing the data... ", end="")
            sdata = sopa.io.phenocycler(path=input_path)
            print("done")

            # Write the data
            print("Writing the data... ", end="")
            sdata.write(output_path)
            print("done")

        except Exception as e:
            print(f"Skipping {file.name} due to error: {e}")

print("All files processed successfully!")

[36;20m[INFO] (sopa.io.reader.phenocycler)[0m Found channel names ['DAPI' 'FoxP3' 'aSMA' 'CD4' 'CD8' 'CD31' 'CD11c' 'IFNG' 'Pan-Cytokeratin'
 'CD68' 'CD20' 'CD66b' 'TNFa' 'CD45RO' 'CD14' 'CD11b' 'Vimentin' 'CD163'
 'IL10' 'CD45' 'CCR7' 'CD38' 'CD69' 'Podoplanin' 'PNAd' 'CD16' 'CXCL13']


Processing: ID_0022111_Scan1.er.qptiff
Parsing the data... done
Writing the data... [34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         ..[35m/data/SpatialData/CODEX/[0m[95mID_0022111_Scan1.er.zarr[0m                                                        


[36;20m[INFO] (sopa.io.reader.phenocycler)[0m Found channel names ['DAPI' 'FoxP3' 'aSMA' 'CD4' 'CD8' 'CD31' 'CD11c' 'IFNG' 'Pan-Cytokeratin'
 'CD68' 'CD20' 'CD66b' 'TNFa' 'CD45RO' 'CD14' 'CD11b' 'Vimentin' 'CD163'
 'IL10' 'CD45' 'CCR7' 'CD38' 'CD69' 'Podoplanin' 'PNAd' 'CD16' 'CXCL13']


done
Processing: ID_0022110_Scan1.er.qptiff
Parsing the data... done
Writing the data... [34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         ..[35m/data/SpatialData/CODEX/[0m[95mID_0022110_Scan1.er.zarr[0m                                                        
done
All files processed successfully!


#### Column by column: From ome.tif to zarr

In [None]:
# Define source and destination directories
codex_path_read = Path('/scratch/lyarab/CODEX_cropped/')  # Folder containing CODEX datasets
codex_path_write = Path('../data/SpatialData/CODEX_cropped/')  # Destination folder for .zarr files

# Ensure the destination folder exists
codex_path_write.mkdir(parents=True, exist_ok=True)

# Iterate over each folder in codex_path_read
for file in codex_path_read.iterdir():
    if file.is_file() and not file.name.startswith("._"):  # Ignore hidden/system files
        input_path = str(file)  
        output_path = str(codex_path_write / f"{file.stem}.zarr")  # Use filename without extension

        print(f"Processing: {file.name}")

        try:
            # Parse the data
            print("Parsing the data... ", end="")
            sdata = sopa.io.ome_tif(path=input_path)
            print("done")

            # Write the data
            print("Writing the data... ", end="")
            sdata.write(output_path)
            print("done")

        except Exception as e:
            print(f"Skipping {file.name} due to error: {e}")

print("All files processed successfully!")