# Notebook with code that uses the lineages from Guignard et al.

## Import of the necessary libraries

In [None]:
import os
from useful_functions import (get_symetric_volumes, plot_around_div,
                              get_life_span, read_property_file,
                              compute_neighbors_stability,
                              get_ratio_mother_sisters_volume)
from colormap import ColorMap
from scipy import stats
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.ticker import ScalarFormatter
%matplotlib inline

## Loading the data

The datasets can be retrieved [there](https://figshare.com/collections/Contact-area_dependent_cell_communications_and_the_morphological_invariance_of_ascidian_embryogenesis/4529990/2). The lineage and geometrical data are stored in the tar archives named `*_properties.tar.gz`. Within these archives there are a pickle (`.pkl`) and an xml (`.xml`) file which contain the information necessary for this notebook to work.

Once these data downloaded, the function `read_property_file` allows to load such an xml or pickle file and returns a dictionary that maps a property name to the dictionary containing this propery:
```python
properties = {property_name(string): property_dict(dict), ... }
```
- `"cell_lineage"` is the lineage tree as a dictionary that maps a cell id at time $t$ to the `list` of its correspondant cell id(s) at time $t+1$.
```python
{mother_cell_id(int): [daughter_cell_id_1(int), ...], ...}
```
- `"all_cells"` is the list of all the cell ids:
```python
[cell_id(int), ...]
```
- `"cell_history"` is a dictionary that maps a cell id to the list of all the cell ids that represent the same cell (ids in time):
```python
{cell_id(int): [cell_id2(int), cell_id2(int), ...], ...}
```
- `"cell_label_in_time"` maps a time $t$ to the list of cells that exist at that time:
```python
{time(int): [cell_id1(int), cell_id2(int), ...], ...}
```
- The other dictionaries map a cell id to its associated values:
 - `"cell_name"` maps the names of the cells as defined by Conklin (after manual correction).
 - `"cell_barycenter"` maps the cell barycenters
 
 - `"cell_fate"`, `"cell_fate_2"` and `"cell_fate_3"` represent different fate mappings (`"cell_fate_3"` is the one used for this paper).
 - `"cell_volume"` maps the volume of the cells in voxels (computed as the sum of voxels inside the segmented cell).
 - `"cell_contact_surface"` maps the cell-cell surface of contacts in voxels (as described in Supp 5.2). `"cell_contact_surface"` is a dictionary of dictionaries:
```python
{cell_id(int): {cell_neighbour_id(int): area_of_contact(float), ...}, ... }
```
In this dictionary if `cell_neighbour_id//10**4==1` then this `cell_neighbour_id` represents the surface of contact to the outside.


It is worth noticing that the cell id are implemented in a way that it is possible to extract the time it belongs too and the corresponding label in the image data:
- `cell_id = t*10**4 + label`

hence:
- `label = cell_id%10**4` is the corresonding label in the image
- `t = cell_id//10**4` is the corresponding time (note that // is the floor division)

__SINCE THE AMOUNT OF DATA IS LARGE, THE LOADING TIME MIGHT TAKE SEVERAL MINUTES__

### Paths definition

In [None]:
# Path to the data.
# You need to download the data from the figshare link
# specified above and put the xml/pkl file in the folder specified in the following line (Data by default)
path_to_data = './Data/'

# This is the path where the figures will be saved
path_to_figures = './Outputs_figures/'

### Build a color map from the different tissues in our datasets

In [None]:
## Building the color map
tissue_order = ['Head Endoderm', '1st Endodermal Lineage',
                '2nd Endodermal Lineage', 'Head Epidermis',
                'Tail Epidermis', 'Germ Line',
                'Mesenchyme', '1st Lineage, Tail Muscle',
                '2nd Lineage, Tail Muscle', '1st Lineage, Notochord',
                '2nd Lineage, Notochord',
                'Trunk Lateral Cell',
                'Trunk Ventral Cell',
                'Anterior Dorsal Neural Plate',
                'Anterior Ventral Neural Plate',
                'Posterior Dorsal Neural Plate',
                'Posterior Lateral Neural Plate',
                'Posterior Ventral Neural Plate']
CMapFates = ColorMap(tissue_order+['undeter'], 'rainbow')

### Loading and preping the data

In [None]:
# Load the data, the output is the property file specified above
properties = read_property_file(os.path.join(path_to_data + 'Astec-Pm10_properties.xml'))

lin_tree = properties['cell_lineage']
names = properties['cell_name']
fates = properties['cell_fate']
fates2 = properties['cell_fate_2']
fates3 = properties['cell_fate_3']
vol = properties['cell_volume']
surf_ex = properties['cell_contact_surface']
prob_cells = properties['problematic_cells']

# Removing the potential problematic cells
for c in prob_cells:
    lin_tree.pop(c, None)

# Building the inverse lineage tree (daughter cell -> mother cell)
inv_lin_tree = {daught: mother for mother, daughters in lin_tree.items() for daught in daughters}

# Building the cell surfaces
surfaces = {cell:np.sum(list(neighbors.values())) for cell, neighbors in surf_ex.items()}

# Building the compactness metric
compactness = lambda V, S: (np.pi**(1/3)*(6*V)**(2/3))/S
comp = {cell:compactness(vol[cell], surfaces[cell]) for cell in vol}

## This is the start of my code for getting several values from the lineages.

The variables that I will be collecting is the starting ID of a cell, the ID of the mother cell at lineage before it splits. The two ID's of the cells that the mother cell split into, and the time between the cell first appearing and when it became a mother cell. If a cell never split in the experiment, it returns the first cell ID. It then puts the last cellID of the cell in the second position of the array. It fills in the child ID's as zero and then gives the total time it was present in the experiment.

After all the first cells and mother cells are collected, The code then does another loop through the new list we generated to get all the names and fates from the cell_names properties and goes through all three fates property arrays to collect all the fates infomation for each cell.

All code and text previous to this markdown was already provided by Guignard et all at this repository [here](https://github.com/leoguignard/ASTEC-examples).

In [None]:
#array where cellIDs are removed while the loop is going through the lineage tree
remaining_cells = lin_tree.copy()
#instantiate the array so info can be appended to it
cell_division_info = []

for x in lin_tree:
    #if the cell is not it remaining cells, it means the loop has already visited the cell and need to skip to the next cellID in the lineage tree
    if x not in remaining_cells:
        continue
    #makes the first element in C the current cellID of the loop
    c=[x]
    #the try statement is for when the end of the lineage of a cell is reached. If a cell is never a mother cell, we go to the except clause and print 0's for the children
    try:
        #This goes through the lineage of a cell and waits for the lineage to either end with two cells or end with 0 cells.
        #it checks the lineage of the last cellID in c
        while len(lin_tree.get(c[-1], []))==1:
            #if the lineage is 1. The loop adds the next cell in the lineage to the last position of c
            c.append(lin_tree[c[-1]][0])
            #removed the visited cell from remaining cells
            del remaining_cells[c[-1]]
        #this append is only called if the lineage tree returns a value of 2, so we can get the cellIDs from the lineage tree
        cell_division_info.append([x, c[-1], lin_tree[c[-1]][0], lin_tree[c[-1]][1], len(c)])
    #the except is called only if the cell has no next lineage. So the lineage ends and fill in 0's for the children
    except KeyError:
        cell_division_info.append([x, c[-1], 0, 0, len(c)])
        
#this is a new loop after we have all the cell division info. Once we have all the starting and mother cells, 
#we grab their names and fates from the arrays given in the xml file
for x in range(len(cell_division_info)):
    if cell_division_info[x][0] in names:
        cell_division_info[x].append(names[cell_division_info[x][0]])
    else:
        cell_division_info[x].append(0)
    if cell_division_info[x][0] in fates:
        cell_division_info[x].append(fates[cell_division_info[x][0]])
    else:
        cell_division_info[x].append(0)
    if cell_division_info[x][0] in fates2:
        cell_division_info[x].append(fates2[cell_division_info[x][0]])
    else:
        cell_division_info[x].append(0)
    if cell_division_info[x][0] in fates3:
        cell_division_info[x].append(fates3[cell_division_info[x][0]])
    else:
        cell_division_info[x].append(0)

This next cell imports the CSV module and writes the cell_division_info to a CSV file.

In [None]:
import csv

#establishes the headers for each column
header = ['Beginning CellID', 'Mother_CellID/Last_CellID', 'Child1_CellID', 'Child2_CellID', 'Life of mother in t', 'the name of the Beginning cell', 'fate 1 of the Beginning cell', 'fate 2 of the Beginning cell', 'fate 3 of the Begining cell']

with open('./CSVOutput/ASTECPM10Data.csv', 'w', newline = '') as f:
    
    writer = csv.writer(f)
    
    #writes in the header in the first row
    writer.writerow(header)
    
    #writes a new row for each array in cell info
    writer.writerows(cell_division_info)