In [2]:
import os
import glob
import numpy as np
#from nd2reader import ND2Reader
import napari
import bigfish
import bigfish.stack as stack
import bigfish.detection as detection
import bigfish.multistack as multistack
import bigfish.plot as plot
import time
import pandas as pd
import tifffile as tiff
from skimage import io
import plotly.express as px

In [6]:
dir1 = "./" ##Input Directory with the ND2 files
dir2 = os.path.join(dir1,'TiffFiles') #New subdirectory where processed files will be saved
dir3 = os.path.join(dir2, 'Max_Projections')  # Directory for max projections
dir4 = os.path.join(dir3, 'FOVs')  # Directory for individual FOVs
dir6 = os.path.join(dir3, "Combined_Files") # Create a directory to save combined files if it doesn't exist
dir7 = os.path.join(dir3, 'Spots') #Save output spot files

# Directories to check and create if they don't exist
dirs_to_create = [dir2, dir3, dir4, dir6]

# Loop through each directory
for directory in dirs_to_create:
    # Check if the directory does not exist
    if not os.path.isdir(directory):
        # If it doesn't, create the directory
        os.mkdir(directory)

# Define parameters
voxelval = 110.3752759382
##radiusval = 250.0
radiusval = 2*voxelval

### Saving Files as OME Tiffs

- All images are converte to .tif files and saved in the TiffFiles folder
- Unclear yet how to change the LUTs

In [None]:
from nis2pyr.convertor import convert_nd2_to_pyramidal_ome_tiff

files=[]
files += [each for each in os.listdir(dir1) if each.endswith('.nd2')]
files = np.array(files)
files.sort()
string = 'Bleach'
mask = np.array([string not in s for s in files]) ##Remove images that were used for testing bleaching

files = files[mask]


# Iterate over each file in the 'files' list (all ND2 files)
for fil in files:
    # Print the directory and filename being processed
    print(dir1+fil)
    
    # Convert the ND2 file to pyramidal OME-TIFF format
    # Specify the input ND2 file path, output OME-TIFF file path,
    # and the maximum number of pyramid levels (set to 1 in this case)
    convert_nd2_to_pyramidal_ome_tiff(dir1+fil, dir2+'/'+fil.split(".")[0]+'.tif',max_levels=1)


### Maximum Intensity Projection and Image Distribution according to FOV 
So all images corresponding to the same FOV but across different cycles are saved in one folder 
- each image for one round is split by its channels and one channel image is saved
- still need to figure out how to manage when there are multiple channels

In [None]:
# List all TIFF files in the input directory and sort them
files = []
files += [each for each in os.listdir(dir2) if each.endswith('.tif')]
files = np.array(files)
files.sort()

# Define a function to compute maximum intensity projection along the Z-axis
def maximum_intensity_projection(image_stack):
    # Calculate the maximum intensity projection along the Z-axis (channels are in the first dimension)
    mip = np.max(image_stack, axis=0)
    return mip

# Loop through each TIFF file
for fil in files:
    # Read the OME-TIFF file
    ome_tiff_path = dir2 + "/" + fil
    image_stack = tiff.imread(ome_tiff_path)
    
    # Compute the maximum intensity projection
    mip = maximum_intensity_projection(image_stack)
    
    # Save the max projection image
    mip_path = dir3 + "MAX_" + fil.split(".")[0] + ".tif"
    tiff.imsave(mip_path, mip)
    
    # Extract FOV information from the filename
    FOV = fil.split("_")
    
    # Create a directory for the FOV if it does not exist
    dir5 = dir4 + "/" + FOV[1].split(".")[0] + "/"
    if not os.path.isdir(dir5):
        os.mkdir(dir5)
    
    # Save each channel of the max projection image as a separate TIFF file
    for chan in range(0, mip.shape[0]):
        image = mip[chan, :, :]
        savename = dir5 + FOV[0] + "_channel_" + str(chan + 1) + ".tif"
        tiff.imsave(savename, image)


### Creating Files combining images from single channel and single FOV across all cycles
- This helps in easy visualization of the data and can also be used for data analysis? 
- the files are saved in the Max_Projection/Combined_Files/ folder

In [None]:
# Get a list of directories in dir3
directories = [name for name in os.listdir(dir3) if os.path.isdir(os.path.join(dir3, name))]
directories.sort()  # Sort the list of directories

# Iterate over each directory
for dir in directories:
    fils = []
    dirs = dir3 + "/" + dir  # Full path to the current directory
    fils += [each for each in os.listdir(dirs) if each.endswith('.tif')]  # Get list of TIFF files in the directory
    fils = np.array(fils)
    fils.sort()  # Sort the list of TIFF files
    
    # Extract channel information from TIFF filenames
    chan = np.char.split(fils, sep="_")
    chan = pd.DataFrame(np.stack(chan, axis=0))[2]  # Extract the third element (channel) from each filename
    chans = pd.unique(chan)  # Get unique channel values
    
    # Iterate over each unique channel
    for c in chans:
        index = chan == c  # Get indices of TIFF files corresponding to the current channel
        multifils = fils[index]  # Get TIFF files corresponding to the current channel
        multichannel_image = np.zeros((len(multifils), image.shape[0], image.shape[1]))  # Initialize array for multichannel image
        round = 0
        # Iterate over each TIFF file corresponding to the current channel
        for m in multifils:
            img = tiff.imread(dirs + "/" + m)  # Read TIFF file
            multichannel_image[round, :, :] = img  # Store image in multichannel array
            round = round + 1
        savename = dir6 + dir + "_channel_" + c  # Define filename for combined multichannel image
        tiff.imsave(savename, multichannel_image)  # Save combined multichannel image as TIFF


### FISH Spot Detection

##### Detecting threshold within multiple FOVs for all rounds and channels
- This could potentially be used to get a global threshold value
- Alternatively can be used to get individual thresholds for each channel and round

In [None]:
# Record start time
start_time = time.time()

# Threshold values for each channel
thresh1 = 18  # Threshold for Channel 1 (generally Cy7)
thresh2 = 18  # Threshold for Channel 2 (generally Cy5)
thresh3 = 18  # Threshold for Channel 3 (generally Cy3B)

# Number of cycles for each channel
chan1round = 8
chan2round = 8
chan3round = 8

# Number of FOVs to use to detect thresholds
nfovs = 15

# Create directory to save combined files if it doesn't exist

# Get list of TIFF files in the combined files directory and sort them
files = []
files += [each for each in os.listdir(dir6) if each.endswith('.tif')]
files = np.array(files)
files.sort()

# Extract channel information from TIFF filenames
chan = np.char.split(files, sep="_")
chan = pd.DataFrame(np.stack(chan, axis=0))[2]
chans = pd.unique(chan)

# Initialize arrays to store threshold values for each channel
ths1 = np.zeros((chan1round * nfovs, 1))
ths2 = np.zeros((chan2round * nfovs + 1, 1))
ths3 = np.zeros((chan3round * nfovs, 1))

# Compute spot radius in pixels
spot_radius_px = detection.get_object_radius_pixel(
                    voxel_size_nm=(voxelval, voxelval), 
                    object_radius_nm=(radiusval, radiusval), 
                    ndim=2)

# Channel 1 - Cy7
e = 0
for fil in range(0, nfovs):
    print("--- Start %s seconds ---" % (time.time() - start_time))
    filename = files[fil * len(chans)]
    img = tiff.imread(dir6 + filename)
    print(filename)
    # Detect spots
    for t in range(0, img.shape[0]):
        rna = img[t, :, :]
        # LoG filter
        rna_log = stack.log_filter(rna, sigma=spot_radius_px)
        # Local maximum detection
        mask = detection.local_maximum_detection(rna_log, min_distance=spot_radius_px)
        # Thresholding
        threshold = detection.automated_threshold_setting(rna_log, mask)
        ths1[e] = threshold
        e = e + 1
print("Finished thresholding for channel 1 after %s seconds ---" % (time.time() - start_time))

# Channel 2 - Cy5
e = 0
for fil in range(0, nfovs):
    print("--- Start %s seconds ---" % (time.time() - start_time))
    filename = files[fil * len(chans) + 1]
    img = tiff.imread(dir6 + filename)
    print(filename)
    # Detect spots
    for t in range(0, img.shape[0]):
        rna = img[t, :, :]
        # LoG filter
        rna_log = stack.log_filter(rna, sigma=spot_radius_px)
        # Local maximum detection
        mask = detection.local_maximum_detection(rna_log, min_distance=spot_radius_px)
        # Thresholding
        threshold = detection.automated_threshold_setting(rna_log, mask)
        ths2[e] = threshold
        e = e + 1
print("Finished thresholding for channel 2 after %s seconds ---" % (time.time() - start_time))

# Channel 3 - Cy3B
e = 0
for fil in range(0, nfovs):
    print("--- Start %s seconds ---" % (time.time() - start_time))
    filename = files[fil * len(chans) + 2]
    img = tiff.imread(dir6 + filename)
    print(filename)
    # Detect spots
    for t in range(0, img.shape[0]):
        rna = img[t, :, :]
        # LoG filter
        rna_log = stack.log_filter(rna, sigma=spot_radius_px)
        # Local maximum detection
        mask = detection.local_maximum_detection(rna_log, min_distance=spot_radius_px)
        # Thresholding
        threshold = detection.automated_threshold_setting(rna_log, mask)
        ths3[e] = threshold
        e = e + 1
print("Finished thresholding for channel 3 after %s seconds ---" % (time.time() - start_time))


#### Applying threshold to detect spots and clusters
- spots are saved in the Spots folder as FOV##\_Channel##\_#.csv
- spot clusters are save in the Spots folder as FOV##\_Channel##\_spotclusters_#.csv
- clusters information is save in the Spots folder as FOV##\_Channel##\_clusters\_#.csv


In [None]:
# Record start time
start_time = time.time()

# Calculate thresholds for each channel
thresh1 = 2 * np.median(ths1) + 40  # Threshold for Channel 1
thresh2 = 2 * np.median(ths2) + 40  # Threshold for Channel 2
thresh3 = 2 * np.median(ths3) + 40  # Threshold for Channel 3
threshs = [thresh1, thresh2, thresh3]

# Number of rounds for each channel
chan1round = 7
chan2round = 7
chan3round = 6

# Define directory paths and get list of TIFF files
files = sorted([each for each in os.listdir(dir6) if each.endswith('.tif')])
chan = pd.unique(pd.DataFrame(np.stack(np.char.split(files, sep="_"), axis=0))[2])
nfovs = int(len(files) / len(chans))

# Iterate over each FOV
for fil in range(nfovs):
    print("--- Start %s seconds ---" % (time.time() - start_time))
    # Iterate over each channel
    for cc in range(len(chans) - 1):
        print("Analysing Channel %s" % (cc))
        filename = files[fil * len(chans) + cc]
        img = tiff.imread(os.path.join(dir6, filename))
        savenamespots = filename.split(".")[0] + ".csv"
        savenamespotscl = filename.split(".")[0] + "_spotclusters.csv"
        savenameclusters = filename.split(".")[0] + "_clusters.csv"
        sp = pd.DataFrame()
        spcl = pd.DataFrame()
        cl = pd.DataFrame()
        print(filename)
        # Detect spots and clusters for each round
        for t in range(img.shape[0] - 1):
            print("Analysing Round %s" % (t + 1))
            rna = img[t + 1, :, :]
            spots = detection.detect_spots(
                images=rna,
                return_threshold=False,
                threshold=threshs[cc],
                voxel_size=(voxelval, voxelval),
                spot_radius=(radiusval, radiusval)
            )
            spots_post_decomposition, dense_regions, reference_spot = detection.decompose_dense(
                image=np.uint16(rna),
                spots=spots,
                voxel_size=(voxelval, voxelval),
                spot_radius=(radiusval, radiusval),
                alpha=0.75,
                beta=0.9,
                gamma=15
            )
            spots_post_clustering, clusters = detection.detect_clusters(
                spots=spots_post_decomposition,
                voxel_size=(int(voxelval), int(voxelval)),
                radius=int(radiusval),
                nb_min_spots=4
            )
            spotspd = pd.DataFrame(spots)
            spclpd = pd.DataFrame(spots_post_clustering)
            clupd = pd.DataFrame(clusters)
            spotspd['round'] = t
            spclpd['round'] = t
            clupd['round'] = t
            sp = pd.concat([sp, spotspd])
            spcl = pd.concat([spcl, spclpd])
            cl = pd.concat([cl, clupd])
        sp.columns = ['Y', 'X', 'round']
        cl.columns = ['Y', 'X', 'nspots', 'index', 'round']
        spcl.columns = ['Y', 'X', 'clusterindex', 'round']
        sp.to_csv(os.path.join(dir6, savenamespots), index=False)
        spcl.to_csv(os.path.join(dir6, savenamespotscl), index=False)
        cl.to_csv(os.path.join(dir6, savenameclusters), index=False)

print("Finished thresholding for channel 1 image %s after %s seconds ---" % (e, time.time() - start_time))


#### Napari viewer
- View all images in one 

In [None]:
# Define a function to find the nearest square number below a given limit
def nearest_square(limit):
    answer = 0
    while (answer + 1) ** 2 < limit:
        answer += 1
    if answer ** 2 == limit:
        return answer
    else:
        return answer + 1

# Initialize Napari viewer
viewer = napari.Viewer()

# Define parameters
gap = 2800  # Gap between images
totalarea = nearest_square(len(files) / 4)  # Total area for arranging images
image = np.zeros((gap * totalarea, gap * totalarea), dtype=np.int16)  # Initialize an empty image array
xx = 0
yy = 0

# Get list of TIFF files
files = sorted([each for each in os.listdir(dir6) if each.endswith('.tif')])
chan = pd.unique(pd.DataFrame(np.stack(np.char.split(files, sep="_"), axis=0))[2])
nfovs = int(len(files) / len(chans))

# Iterate over each FOV
for fil in range(int(len(files) / 4)):
    imagename = files[len(chans) * fil + 3]
    print(imagename)
    imageloc = os.path.join(dir6, imagename)
    im2 = tiff.imread(imageloc)
    image[xx * gap:xx * gap + im2.shape[1], yy * gap:yy * gap + im2.shape[1]] = im2[0, :, :]

    # Read spot files for each channel
    cy7spots = pd.read_csv(os.path.join(dir7, files[len(chans) * fil].split(".")[0] + "_spotclustters.csv"))
    cy5spots = pd.read_csv(os.path.join(dir7, files[len(chans) * fil + 1].split(".")[0] + "_spotclustters.csv"))
    cy3spots = pd.read_csv(os.path.join(dir7, files[len(chans) * fil + 2].split(".")[0] + "_spotclustters.csv"))

    # Update spot coordinates for each channel
    cy7spots['Y'] = cy7spots['Y'] + xx * gap
    cy7spots['X'] = cy7spots['X'] + yy * gap
    cy5spots['Y'] = cy5spots['Y'] + xx * gap
    cy5spots['X'] = cy5spots['X'] + yy * gap
    cy3spots['Y'] = cy3spots['Y'] + xx * gap
    cy3spots['X'] = cy3spots['X'] + yy * gap

    # Get gene names for each channel
    gency7ind = np.array(cy7spots['round'].values)
    genecy7 = [genescy7[j] for j in gency7ind]
    gency5ind = np.array(cy5spots['round'].values)
    genecy5 = [genescy5[j] for j in gency5ind]
    gency3ind = np.array(cy3spots['round'].values)
    genecy3 = [genescy3[j] for j in gency3ind]

    # Assign gene names to each spot
    cy7spots['gene'] = genecy7
    cy5spots['gene'] = genecy5
    cy3spots['gene'] = genecy3

    # Concatenate spot data for each channel
    spotscy7 = pd.concat([spotscy7, cy7spots])
    spotscy5 = pd.concat([spotscy5, cy5spots])
    spotscy3 = pd.concat([spotscy3, cy3spots])

    xx = xx + 1
    if xx > totalarea - 1:
        xx = 0
        yy = yy + 1

# Add image to Napari viewer
imagelayer = viewer.add_image(np.uint16(image))
imagelayer.contrast_limits = (0, 65000)

# Concatenate spot data for all channels
allspots = pd.concat([spotscy7, spotscy5], axis=0)
allspots = pd.concat([allspots, spotscy3], axis=0)

# Iterate over each gene and add points to the viewer for each channel
for round in range(0, len(genescy7)):
    cy7 = spotscy7[spotscy7['round'] == round]
    cy5 = spotscy5[spotscy5['round'] == round]
    cy3 = spotscy3[spotscy3['round'] == round]

    viewer.add_points(np.array(cy7)[:, 0:2],
                      face_color=color[(len(chans) - 1) * round],
                      size=5,
                      blending='translucent_no_depth',
                      edge_width=0,
                      name=genescy7[round])

    viewer.add_points(np.array(cy5)[:, 0:2],
                      face_color=color[(len(chans) - 1) * round + 1],
                      size=5,
                      blending='translucent_no_depth',
                      edge_width=0,
                      name=genescy5[round])

    viewer.add_points(np.array(cy3)[:, 0:2],
                      face_color=color[(len(chans) - 1) * round + 2],
                      size=5,
                      blending='translucent_no_depth',
                      edge_width=0,
                      name=genescy3[round])

# Save spot data to CSV files
featuresall = allspots['gene']
featuresall.to_csv(os.path.join(dir7, 'features.csv'), index=False)
allspots.to_csv(os.path.join(dir7, "allspots.csv"), index=False)

# Add points for all spots to the viewer
feat = tuple(np.array(featuresall))
pointlayer = viewer.add_points(np.array(allspots)[:, 0:2],
                                face_color='white',
                                size=5,
                                blending='translucent_no_depth',
                                edge_width=0,
                                name='All_spots',
                                opacity=0,
                                features=feat)

pointlayer.refresh()
