## Mattia Project 
### --cell Segmentation--

In [9]:
import matplotlib.pyplot as plt
import numpy as  np
import pandas as pd
from PIL import Image
import cv2
from skimage import exposure
from skimage.transform import resize
from skimage.filters import sobel
import os
from datetime import datetime
from pathlib import Path
from sklearn.cluster import MiniBatchKMeans

#!uv pip install ultralytics
from ultralytics import SAM, YOLO
#ultralytics.checks()

In [4]:
dir_path = "./pics" #directory where the pictures are located
out_dir = "./out"
temp_dir = "./temp"

In [None]:
def get_filenames(dir_path:str=dir_path)->list:
  folder = Path(dir_path)
  extensions = [".jpg",".jpeg",".png"]
  image_paths = sorted([p for p in folder.iterdir() if p.suffix.lower() in extensions])
  return image_paths

def preprocess_images(image_paths:list)->list:
  pics = [cv2.imread(path,cv2.IMREAD_GRAYSCALE) for path in image_paths]
  resized = [cv2.resize(pic,(528,528)) for pic in pics]
  normalized = [pic/255.0 for pic in resized]
  return normalized

def kmeans(pic):
    pixels = pic.reshape(-1, 1)
    kmeans = MiniBatchKMeans(n_clusters=3, batch_size=2048, max_iter=100, random_state=0)
    labels_kmeans = kmeans.fit_predict(pixels).reshape(pic.shape)
    return labels_kmeans

def get_connected_component():
  pass

def is_bubble():
  pass

def is_mesh():
  pass

def insert_noise():
  pass

def filter_mesh_bubbles(pic):
  #bubbles are extrem round and symmetric
  #mesh is a reappearing grid
  #filter both
  kmeans_labels = kmeans(pic)



In [None]:
def preprocess_images

In [None]:
model = SAM("sam2.1_b.pt")

def identify_cell_mask(file_path="./pics/Invasion.10.10.MM.untreated.1.jpg",threshold_low=130,threshold_high=0.65):
  #identifies cell if bigger than radius of threshold_low and smaller than 0.65 of the whole image

  results = model(file_path)  # image
  masks = results[0].masks.data.cpu().numpy()
  if masks.shape[0]<=1:
    print(f"Didn't found cellbody for image: {file_path}")
  else:
    #select biggest mask for that is not whole image
    mask_sums = masks.sum(axis=(1,2))
    w,b = cv2.imread(file_path,cv2.IMREAD_GRAYSCALE).shape
    thresholds_max_mask = mask_sums< (w*b*threshold_high) #nicht größer als 2/3
    if not np.any(thresholds_max_mask):
      print(f"only found one big mask no cell")
    else:
      idx = np.argmax(mask_sums * thresholds_max_mask)
      if mask_sums[idx]<(threshold_low**2 * np.pi):
        print(f"found mask to small beneath {threshold_low} pixels")
      else:
        return masks[idx,:,:]


In [None]:
def detect_fibroblast_arms(file_path,img_save_path=None,threshold=0.02,show_fig=False):
  img = cv2.imread(file_path,cv2.IMREAD_GRAYSCALE)
  sobel_img = sobel(img)
  counts,bins = np.histogram(sobel_img.ravel(),bins=300)
  threshold=threshold
  mask = (sobel_img>threshold)

  if img_save_path is not None or show_fig:
    fig, (ax1, ax2) = plt.subplots(1, 2)
    ax1.set_title("Value distribution of sobel edges")
    ax1.bar(bins[:-1], counts, width=np.diff(bins), align='edge', color='gray', edgecolor='black')
    ax1.vlines(threshold,ymin=0,ymax=counts.max(),color="r",label="threshold")
    ax1.legend()
    ax2.imshow(img,cmap="gray")
    ax2.imshow(mask, cmap='Reds', alpha=0.5)
    ax2.set_title("Mask of Edges")

  if img_save_path is not None:
    plt.savefig("detected_edges_" + temp_dir)
  if show_fig:
    plt.show()
  return mask

In [None]:
def fuse_cell_with_fibroblasts(cell_mask,edge_mask):
  import cv2
  import numpy as np

  cell = (cell_mask > 0).astype(np.uint8)
  edges = (edge_mask > 0).astype(np.uint8)

  # dilate cell to catch touching edges
  kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
  cell_d = cv2.dilate(cell, kernel)

  # connected components on edges
  num, labels = cv2.connectedComponents(edges, connectivity=8)

  # labels touching cell
  touch_ids = np.unique(labels[cell_d.astype(bool)])
  keep = np.isin(labels, touch_ids[touch_ids != 0]).astype(np.uint8)

  # optional thicken
  keep = cv2.dilate(keep, kernel)

  fused = np.clip(cell + keep, 0, 1).astype(np.uint8)
  return fused

In [None]:
def show_masks(img_path,cell_mask,arms_mask,fuse_mask,threshold=0.02):
  img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE)
  fig,ax = plt.subplots(1, 4,figsize=(12, 4))
  for axis in ax:
    axis.imshow(img,"grey")
    axis.axis("off")

  ax[0].set_title("Original Image")

  ax[1].imshow(cell_mask, cmap='jet', alpha=0.5)
  ax[1].set_title("Cell Body")

  ax[2].imshow(arms_mask, cmap='jet', alpha=0.5)
  ax[2].set_title(f"Edges (t:{threshold:.2f})")

  ax[3].imshow(fuse_mask, cmap='jet', alpha=0.5)
  ax[3].set_title("Final Cell")
  filename = os.path.basename(img_path)
  name, ext = os.path.splitext(filename)
  plt.savefig(os.path.join(out_dir,f"{name}_MASKS{ext}"))
  #plt.show()

In [None]:
def arm_to_body_pixel_ratio(cell_mask,fuse_mask):
  pixels_cell = cell_mask.sum()
  pixels_all = fuse_mask.sum()
  pixels_arms = pixels_all-pixels_cell
  ratio = pixels_arms/pixels_all
  return pixels_cell,pixels_arms,ratio

In [None]:
def get_masks(files=files,create_pictures=True):

  masks = []
  for file in files:
    cell_mask = identify_cell_mask(file_path=file)
    arm_mask = detect_fibroblast_arms(file_path=file)
    if cell_mask is not None:
      fuse_mask = fuse_cell_with_fibroblasts(cell_mask,edge_mask=arm_mask)
      stats = arm_to_body_pixel_ratio(cell_mask=cell_mask,fuse_mask=fuse_mask)
      masks.append((file,cell_mask,arm_mask,fuse_mask,stats))
      if create_pictures:
        show_masks(file,cell_mask,arm_mask,fuse_mask)
  return masks

masks = get_masks()

In [None]:
def get_ratio_table(masks):
  file_names = [mask[0] for mask in masks]
  data = [mask[-1] for mask in masks]
  df = pd.DataFrame(index=file_names,
                    data=data,
                    columns=["Pixels in Cell", "Pixels in Arms", "Ratio"])
  return df

df = get_ratio_table(masks)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
new_filename = os.path.join(out_dir,f"ratio_images_{timestamp}.csv")
df.to_csv(new_filename)