### Pre-imports.

In [None]:
# Imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
import copy
import tqdm
from typing import List
import pandas as pd
import copy


In [None]:
# Constants
colors = [
    (255, 0, 0),
    (0, 255, 0),
    (0, 0, 255),
    (255, 255, 0),
    (255, 0, 255),
    (0, 255, 255),
    (255, 192, 203),
    (0, 125, 125),
    (255, 125, 125),
    (125, 255, 255),
    (255, 255, 125),
    (125, 255, 125),
    (255, 125, 255),
    (125, 125, 125),
    (125, 125, 0),
    (0, 125, 0)]

## Part 4.2

Shape and texture of the cells may vary over time and over the different conditions.
For shape you can start by computing the area and perimeter. 
However, as cells my stretch, in addition, compute the roundness as shape feature. As from the segmentation, amask for each cell in the  tracking is obtained,  now  compute  texture  features  using  the  cell  mask;  i.e., mean, standard deviation, smoothness and uniformity.



### 4.2.5

5) From the cells obtained from the tracking, compute the aforementioned features for  shape  and texture. 
This is for  both  conditions  and  presented  in  two  different tables.

We first load some of the functions from the first part to use. But change it a bit to also calculate features such as area, perimeter, roundness, smoothness and the uniformity. 

After that we store it in the results_4.2 folder under Q5_A and Q5_B for the A (non-control) and B (control) group. 
Here we decided to just look at the starting image and not all the tracked images, but these can be seen as well by adjusting the first line of the trace_cells_over_image function, which now uses the first image of 30. 


In [None]:
      
def segment_cells(image, plot=False):
    """
    Segment different cells in image. Note that many pipelines were tried, and just normalizing and thresholding worked best
    """
    original = image.copy()
    # Normalize
    image = cv2.normalize(image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)    
        
    # Open the image to remove small shapes
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)))
    
    # Threshold the image
    _, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

    # Convert to unsigned 8-bit integers
    image = image.astype(np.uint8)

    # Find the contours
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contours
    largest = sorted(contours, key=cv2.contourArea, reverse=True)[:15]

    return contours

class Cell:
    def __init__(self, contour, idx):
        self.idx = idx
        self.contour = contour
        self.mean_x = np.mean(contour[:, :, 0])
        self.mean_y = np.mean(contour[:, :, 1])
        self.area = cv2.contourArea(contour)
        self.width = max(contour[:, :, 0]) - min(contour[:, :, 0])
        self.height = max(contour[:, :, 1]) - min(contour[:, :, 1])

    def features(self):
        return np.array([
            np.array(self.contour),
            self.mean_x,
            self.mean_y,
            self.area,
            self.width,
            self.height
        ], dtype=object)

    def to_contour(self):
        return self.contour

    def distance(self, cell):
        return self.euclidean_distance(cell)

    def euclidean_distance(self, cell):
        """
        Return the Squared Euclidean Distance between this and another cell
        Since it is a relative distance, no square root is applied
        """
        return abs(self.mean_x - cell.mean_x) ** 2 + abs(self.mean_y - cell.mean_y) ** 2

    def copy(self, idx):
        new = copy.deepcopy(self)
        new.idx = idx
        return new

    def __str__(self):
        return f"Cell {self.idx}, mean {self.mean_x, self.mean_y}\n"

def trace_cells_over_image(images, plot=False):
    image = images[0] # Change this to change the image
    segmentations_image = segment_cells(image, plot=False)
    sorted_segmentations = sorted(segmentations_image, key=cv2.contourArea, reverse=True)

    segmented_cells = [Cell(cell, i) for i, cell in enumerate(sorted_segmentations)]

    # Calculate additional features for the first 15 cells
    for i in range(len(segmented_cells[:15])):
        cell = segmented_cells[i]
        mask = np.zeros_like(image, dtype=np.uint8)
        cv2.drawContours(mask, [cell.to_contour()], -1, (255), thickness=cv2.FILLED)
        mean, std = cv2.meanStdDev(images[0], mask=mask)
        smoothness = std[0][0] 
        uniformity = 1.0 - (1.0 / (1.0 + smoothness))
        
        cell.additional_features = {
            'Area': cell.area,
            'Perimeter': cv2.arcLength(cell.to_contour(), True),
            'Roundness': (4 * np.pi * cell.area) / (cv2.arcLength(cell.to_contour(), True) ** 2),
            'Mean': mean[0][0],
            'Standard Deviation': std,
            'Smoothness': smoothness,
            'Uniformity': uniformity
        }

    return segmented_cells[:15]

In [None]:
# Load the images
images_A = [np.mean(plt.imread(f"images/MTLn3-Ctrl00{'0' + str(i) if i < 10 else i}.tif"), -1).astype(np.uint16) for i in range(30)]
images_B = [np.mean(plt.imread(f"images/MTLn3+EGF00{'0' + str(i) if i < 10 else i}.tif"), -1).astype(np.uint16) for i in range(30)]

# Apply the algorithm
cells_a = trace_cells_over_image(images_A, plot=False)
cells_b = trace_cells_over_image(images_B, plot=False)



# Create empty DataFrames to store the cell information
df_a = pd.DataFrame(columns=['Image', 'Cell ID', 'Mean X', 'Mean Y', 'Area', 'Width', 'Height', 'Perimeter', 'Roundness', 'Mean', 'Standard Deviation', 'Smoothness', 'Uniformity'])
df_b = pd.DataFrame(columns=['Image', 'Cell ID', 'Mean X', 'Mean Y', 'Area', 'Width', 'Height', 'Perimeter', 'Roundness', 'Mean', 'Standard Deviation', 'Smoothness', 'Uniformity'])

# Iterate over the images in cells_a and cells_b
for i in range(len(cells_a)):
    # Extract the features for cells in the current image
    features_a = cells_a[i].features()
    features_b = cells_b[i].features()
    
    # Create a DataFrame for the current image in cells_a
    df_img_a = pd.DataFrame([features_a], columns=['Contour', 'Mean X', 'Mean Y', 'Area', 'Width', 'Height'])
    df_img_a['Cell ID'] = 1  # Assign a single cell ID
    
    # Calculate additional features for cells_a
    additional_features_a = cells_a[i].additional_features
    for feature, value in additional_features_a.items():
        df_img_a[feature] = value
    
    # Create a DataFrame for the current image in cells_b
    df_img_b = pd.DataFrame([features_b], columns=['Contour', 'Mean X', 'Mean Y', 'Area', 'Width', 'Height'])
    df_img_b['Cell ID'] = 1  # Assign a single cell ID
    
    # Calculate additional features for cells_b
    additional_features_b = cells_b[i].additional_features
    for feature, value in additional_features_b.items():
        df_img_b[feature] = value
    
    # Add the image number to the DataFrames
    df_img_a['Image'] = i + 1
    df_img_b['Image'] = i + 1

    # Drop the contour column
    df_img_a = df_img_a.drop(columns=['Contour'])
    df_img_b = df_img_b.drop(columns=['Contour'])
    
    # Append the DataFrames to the main DataFrames
    df_a = pd.concat([df_a, df_img_a], ignore_index=True)
    df_b = pd.concat([df_b, df_img_b], ignore_index=True)


# make into a 2 table for the report (only the first 5 cells) and round to 3 decimal places
df_a = df_a.round(3)
df_b = df_b.round(3)
df_a = df_a.drop(columns=['Image'])
df_b = df_b.drop(columns=['Image'])
# Save the DataFrames to CSV files
df_a.to_csv('results_4.2/Q5_A.csv', index=False)
df_b.to_csv('results_4.2/Q5_B.csv', index=False)



### 4.2.6

Next, given the fact that the tracking is completed, features over the time-line be computed.
First compute these features then in order to establish possible differences, do make a graph of distance over time. 
Again,we assume a time interval of 2 minute between the images. 

In [None]:
   
def trace_cells_over_images(images, plot=False):
    # Segment the first image and create Cell objects
    segmentations_first_image = segment_cells(images[0], plot=False)
    sorted_segmentations = sorted(segmentations_first_image, key=cv2.contourArea, reverse=True)
    
    segmented_cells = [Cell(cell, i) for i, cell in enumerate(sorted_segmentations)]
        
    # Select the 15 largest cells from the first image
    tracking_cells = segmented_cells[:15]
    
    # Apply the tracking algorithm. 
    # Segment all images
    segmentation_all_images = [tracking_cells] + [[Cell(cell, i) for i, cell in enumerate(segment_cells(images[j], plot=False))] for j in range(1, len(images))]
    
    # Select the 15 cells iteratively by computing the distance for each cell in the next image
    for i in range(len(segmentation_all_images) - 1):
        current_seg = segmentation_all_images[i]
        next_seg = segmentation_all_images[i + 1]
        new_next_seg = []
        
        # Find closest match for each cell
        for cell in current_seg:
            match = min(next_seg, key=lambda l: cell.distance(l))
            new_next_seg.append(match.copy(cell.idx))
            next_seg.remove(match)
        
        segmentation_all_images[i + 1] = new_next_seg
    
    return segmentation_all_images

def calculate_cell_distances(cells):
    # Calculate the distance traveled by each cell
    cell_distances = []
    for i in range(1, len(cells)):
        distances = []
        for cell in cells[i]:
            prev_cell = next((prev for prev in cells[i - 1] if prev.idx == cell.idx), None)
            if prev_cell is not None:
                distance = np.sqrt((cell.mean_x - prev_cell.mean_x) ** 2 + (cell.mean_y - prev_cell.mean_y) ** 2)
                distances.append(distance)
        cell_distances.append(distances)
    return cell_distances

def plot_cell_distances_over_time(cells):
    # Calculate the cell distances
    cell_distances = calculate_cell_distances(cells)

    # Create an array of time steps
    time_steps = np.arange(1, len(cell_distances) + 1)

    # Plot the cell distances
    plt.figure(figsize=(10, 6))
    for cell_idx in range(len(cells[0])):
        distances = [distances[cell_idx] for distances in cell_distances]
        plt.plot(time_steps, distances, label=f"Cell {cell_idx+1}")
    
    plt.xlabel("Time Step")
    plt.ylabel("Distance Traveled")
    plt.title("Distance Traveled by Cells over Time")
    plt.legend()
    plt.show()





In [None]:
# Apply the algorithm
results_a = trace_cells_over_images(images_A, plot=False)
results_b = trace_cells_over_images(images_B, plot=False)

# Plot the cell distances for results_a
plot_cell_distances_over_time(results_a)

# Plot the cell distances for results_b
plot_cell_distances_over_time(results_b)


Compute for both conditions, the cell velocity, distance trajectory and present this is in a table.

Because we wanted to show this in a table of all time-steps we decided to save it to an csv file. The file holds the data for each tracked cell, with a list of values corresponding per time-step. This way we can track the velocity or distance traveled per 2 minutes. The tables are also printed, but not so visible in this form. 

In [None]:
import pandas as pd
import numpy as np

def calculate_cell_distances(cells):
    # Calculate the distance traveled by each cell
    cell_distances = []
    for i in range(1, len(cells)):
        distances = []
        for cell in cells[i]:
            prev_cell = next((prev for prev in cells[i - 1] if prev.idx == cell.idx), None)
            if prev_cell is not None:
                distance = np.sqrt((cell.mean_x - prev_cell.mean_x) ** 2 + (cell.mean_y - prev_cell.mean_y) ** 2)
                distances.append(distance)
        cell_distances.append(distances)
    return cell_distances

def calculate_cell_velocity(cells, time_interval):
    # Calculate the velocity of each cell
    cell_velocity = []
    for i in range(1, len(cells)):
        velocities = []
        for cell in cells[i]:
            prev_cell = next((prev for prev in cells[i - 1] if prev.idx == cell.idx), None)
            if prev_cell is not None:
                distance = np.sqrt((cell.mean_x - prev_cell.mean_x) ** 2 + (cell.mean_y - prev_cell.mean_y) ** 2)
                velocity = distance / time_interval
                velocities.append(velocity)
        cell_velocity.append(velocities)
    return cell_velocity

def calculate_cell_trajectory(cells):
    # Calculate the trajectory of each cell
    cell_trajectory = []
    for i in range(len(cells)):
        trajectories = []
        for cell in cells[i]:
            trajectory = (cell.mean_x, cell.mean_y)
            trajectories.append(trajectory)
        cell_trajectory.append(trajectories)
    return cell_trajectory

def create_cell_metrics_table(cells, time_interval):
    # Calculate cell distances, velocities, and trajectories
    cell_distances = calculate_cell_distances(cells)
    cell_velocity = calculate_cell_velocity(cells, time_interval)
    cell_trajectory = calculate_cell_trajectory(cells)

    # Create a table to hold the metrics
    data = {
        'Cell Index': [],
        'Distance Traveled': [],
        'Velocity': [],
        'Trajectory': []
    }

    # Populate the table with the metrics for each cell
    for i in range(len(cells[0])):
        cell_index = i + 1
        distances = [distances[i] for distances in cell_distances]
        velocities = [velocities[i] for velocities in cell_velocity]
        trajectory = [trajectory[i] for trajectory in cell_trajectory]
        data['Cell Index'].append(cell_index)
        data['Distance Traveled'].append(distances)
        data['Velocity'].append(velocities)
        data['Trajectory'].append(trajectory)

    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    return df

# Apply the algorithm
results_a = trace_cells_over_images(images_A, plot=False)
results_b = trace_cells_over_images(images_B, plot=False)

# Set the time interval (in minutes)
time_interval = 2

# Create the metrics table for results_a
table_a = create_cell_metrics_table(results_a, time_interval)
table_a = table_a.round(3)
print(table_a)
table_a.to_csv('results_4.2/Q6_A.csv')

# Create the metrics table for results_b
table_b = create_cell_metrics_table(results_b, time_interval)
table_b = table_b.round(3)
print(table_b)
table_b.to_csv('results_4.2/Q6_B.csv')



### 4.2.7

Within the cells that you have followed, identify differences in the trajectories by assessing the trajectories separately for both conditions.

For this part we decided to track the movements of the cells on a plot. This way we can compare the trajectories and speeds of both groups against each other. First we show group A and then group B. 

It is visible that the first group (A) has a lower area that they cover in the time. The distance traversed by the second group is noticeably further of each cell, not staying in one spot. Some cells in group A move a bit, but no more than half of group B's  cells.

In [None]:
def plot_cell_metrics_over_time(metrics_table):
    fig, ax = plt.subplots(figsize=(10, 10))

    # Set the x and y limits based on the image size
    image_size = 550  # Adjust this value according to your image size
    ax.set_xlim(0, image_size)
    ax.set_ylim(0, image_size)

    cell_indices = metrics_table['Cell Index']
    max_trajectory_length = max(len(trajectory) for trajectory in metrics_table['Trajectory'])

    # Plot the cell trajectories
    for i, trajectories in enumerate(metrics_table['Trajectory']):
        x_values = [point[0] for point in trajectories]
        y_values = [point[1] for point in trajectories]
        padded_x = np.pad(x_values, (0, max_trajectory_length - len(x_values)))
        padded_y = np.pad(y_values, (0, max_trajectory_length - len(y_values)))
        ax.plot(padded_x, padded_y, '--', alpha=0.5, label=f"Cell {cell_indices[i]}")

        # Add start and end markers with labels
        start_point = (x_values[0], y_values[0])
        end_point = (x_values[-1], y_values[-1])
        ax.plot(*start_point, 'go', markersize=5)
        ax.plot(*end_point, 'ro', markersize=5)
        ax.text(start_point[0], start_point[1], "Start", fontsize=8, verticalalignment='bottom')
        ax.text(end_point[0], end_point[1], "Stop", fontsize=8, verticalalignment='bottom')

    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.set_title("Cell Trajectories")
    ax.legend()
    plt.show()

# Plot the cell metrics for results_a
plot_cell_metrics_over_time(table_a)

# Plot the cell metrics for results_b
plot_cell_metrics_over_time(table_b)


### 4.2.8

If  there  are  differences  within  the  cells  in  one  condition,  these differences need  to  be formulated  in  terms  of  variation  within  one  condition. This  need  be  done for  all  the measurements in a condition.

In [None]:
import statistics
import numpy as np

# Calculate the variation and standard deviation for condition A
velocities_condition_a = []
distances_condition_a = []
for i in range(15):
    velocity = table_a['Velocity'][i]
    distance_traveled = table_a['Distance Traveled'][i]

    mean_velocity = statistics.mean(velocity)
    velocities_condition_a.append(mean_velocity)

    sum_distance_traveled = sum(distance_traveled)
    distances_condition_a.append(sum_distance_traveled)

variation_condition_a = np.var(velocities_condition_a)
std_condition_a = np.std(velocities_condition_a)
total_distance_traveled_condition_a = np.sum(distances_condition_a)

print(f"Variation in velocity for condition A: {variation_condition_a}")
print(f"Standard deviation in velocity for condition A: {std_condition_a}")
print(f"Total distance traveled for all cells in condition A: {total_distance_traveled_condition_a}")


# Calculate the variation and standard deviation for condition B
velocities_condition_b = []
distances_condition_b = []
for i in range(15):
    velocity = table_b['Velocity'][i]
    distance_traveled = table_b['Distance Traveled'][i]

    mean_velocity = statistics.mean(velocity)
    velocities_condition_b.append(mean_velocity)

    sum_distance_traveled = sum(distance_traveled)
    distances_condition_b.append(sum_distance_traveled)



variation_condition_b = np.var(velocities_condition_b)
std_condition_b = np.std(velocities_condition_b)
total_distance_traveled_condition_b = np.sum(distances_condition_b)

print(f"Variation in velocity for condition B: {variation_condition_b}")
print(f"Standard deviation in velocity for condition B: {std_condition_b}")
print(f"Total distance traveled for all cells in condition B: {total_distance_traveled_condition_b}")


### 4.2.9

Deduce from the data if a correlation between speed with shape and texture can be established.

-When comparing the velocity and distance traveled between the two conditions we can see that the experimental condition (set A) has almost half or a third of the average velocity and distance traveled. These two go hand in hand ofcourse, but it is interesting to see that all of set B's cells often have more than twice the distance traveled. \
-The average size of set A is also smaller than set B, thus a trend can be seen in the two conditions where speed is increased with larger sizes. This can be found back in the tables from Q5. \
-Thus it is possible to see that the size does have an effect on the speed of the cells. However, the roundness is set A was also better, thus it might be possible that due to the better identification of a cell, less variance in locating the cell was introduced. 

### 4.2.10

From  the  measurements  that  are  compiled, some  conclusions might  have  been  possible. However, given the result, ideas might have developed how to setup a next experiment to get even better measurements and conclusions.

10. Comment on image quality and image resolution in all dimensions studied. 

The image quality of the images were not great. Sometimes a glare could be visible in the middle of the image, this caused some cells to appear different than other cells, and the background less dark. Making it hard to set a definitive background. 

There was also sometimes black noise in the white cells, not sure how this can be prevented, but it made identification of certain perimeter cells more difficult. 

Some bodies of cells were more like smears and had a ghost-like appearance. This caused them to blend more with the background, and not being included in the cells body. This is maybe due to movement, thus maybe take a shorter interval of images. 
