In [None]:
import pandas as pd

df = pd.read_csv('phenotype.csv') # read the csv file

In [None]:
useful_data = df[["X_centroid", "Y_centroid","manual_leiden_edges_necrosis_muscle"]] # select the columns that we need
useful_data.head()

In [None]:
useful_data = useful_data[useful_data.manual_leiden_edges_necrosis_muscle != "edges"] # remove the rows that are not useful


In [None]:
useful_data = useful_data[useful_data.manual_leiden_edges_necrosis_muscle != "other immune cells"] # remove the rows that are not useful

In [None]:
useful_data = useful_data[useful_data.manual_leiden_edges_necrosis_muscle != "excluded"] # remove the rows that are not useful

In [None]:
useful_data.manual_leiden_edges_necrosis_muscle.unique()

In [None]:
mapping =  dict((v, i) for i, v in enumerate(useful_data.manual_leiden_edges_necrosis_muscle.unique())) # create a dictionary to map the cell type to a number
mapping

In [None]:
useful_data.head()

In [None]:
useful_data['manual_leiden_edges_necrosis_muscle'] = useful_data['manual_leiden_edges_necrosis_muscle'].map(mapping) # map the cell type to a number
useful_data.head()

In [None]:
useful_data.reset_index(drop=True, inplace=True) # reset the index

In [None]:
useful_data

In [None]:
useful_data['manual_leiden_edges_necrosis_muscle'].value_counts() # count the number of each cell type

In [None]:
import tifffile as tiff
import numpy as np
import matplotlib.pyplot as plt

image = tiff.imread('necrosis_muscle.tif') # read the image and becomes a numpy array
image.shape # check the shape of the image

In [None]:
image = image.transpose((1, 2, 0)) # change the shape of the image to (height, width, channel)
image.shape

In [None]:
#display the image

# plt.imshow(image)
# plt.axis('off')  # Hide the axis
# plt.show()

In [None]:
def get_cell_boundaries(x_cent, y_cent, windowSize): # function to get the cell boundaries
    x_start = max(int(x_cent - windowSize // 2),0)
    x_end = min(x_start + windowSize, image.shape[1]) # get the x boundaries
    x_pair = (x_start, x_end)

    y_start = max(int(y_cent - windowSize // 2),0)
    y_end = min(y_start + windowSize, image.shape[0]) # get the y boundaries
    y_pair = (y_start, y_end)
    return x_pair, y_pair

In [None]:
size = 64 # size of the image
import tensorflow as tf
list_of_images = [] # list to store the images
converted_images = [] # list to store the converted images

for index, row in useful_data.iterrows(): # iterate through the rows
    x_pair, y_pair = get_cell_boundaries(row["X_centroid"], row["Y_centroid"], size) # call the function to get the cell boundaries
    x_start, x_end = x_pair # get the x boundaries
    y_start, y_end = y_pair # get the y boundaries
    crop = image[y_start:y_end, x_start:x_end] # crop the image
    
    if crop.shape != (size, size, 3):
        # pad the image with zeros to make it size x size
        image_tensor = tf.convert_to_tensor(crop)
        padded_image_tensor = tf.image.resize_with_pad(image_tensor, size, size)
        padded_image = padded_image_tensor.numpy()
        list_of_images.append(padded_image) # append the padded image to the list
    else:
        list_of_images.append(crop) # append the cropped image to the list
        
converted_images = np.array(list_of_images) # convert the list to a numpy array
converted_images = converted_images.astype(np.uint8) # convert the data type to uint8


In [None]:
labels = useful_data["simplified_leiden"].to_numpy() # get the labels

In [None]:
import h5py

with h5py.File('13.h5', 'w') as f: # create a h5 file
    f.create_dataset('images', data=converted_images) # store the images
    f.create_dataset('labels', data=labels) # store the labels