In [4]:
import anndata
import copy
import datetime
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import sys
import pandas as pd
import pickle as pkl
import scipy.stats
import warnings


In [5]:
adata_dir = "/Users/DominikKlein/Documents/PhD/data/Hu_zebrafish_linnaeus/adata.h5ad"
trees_dir = "/Users/DominikKlein/Documents/PhD/data/Hu_zebrafish_linnaeus/trees.pkl"

In [6]:
adata = anndata.read(adata_dir)

In [8]:
with open(trees_dir, 'rb') as file:
    trees = pkl.load(file)

In [9]:
dpis_orig_ident = ['H5', 'Hr26', 'Hr2a','Hr2b']
dpis = [0, 3, 7, 7]
dpis_unq = np.unique(dpis)
dpis_str =['ctrl', 'dpi3', 'dpi7']
dpis_trees = ['H5', 'Hr26', 'Hr2']
core_nodes = [100, 100, 100]



In [11]:
adata_dpis = adata[adata.obs['orig.ident'].isin(dpis_orig_ident)].copy()
adata_dpis.obs['dpi'] = 0
for i, dpi_orig in enumerate(dpis_orig_ident):
    adata_dpis.obs.loc[(adata_dpis.obs['orig.ident'] == dpi_orig), 'dpi'] = dpis[i]

adata_dpis.obs['dpi'] = adata_dpis.obs['dpi'].astype("category")


trees_dpis = {}
trees_dpis_core = {}
for i, dpi in enumerate(dpis_unq):
    trees_dpis[dpi] = trees[dpis_trees[i]]
    trees_dpis_core[dpi] = trees_dpis[dpi].subgraph([str(n) for n in range(core_nodes[i])])

In [12]:
def intersection(lst1, lst2):
    return list(set(lst1) & set(lst2))
     
def scar_distance(lst1, lst2):
    inter_len = len(list(set(lst1) & set(lst2)))
    max_len = np.max([len(lst1), len(lst2)])
    return (max_len - inter_len)/max_len

In [13]:
adata_dpis.obs['core'] = 0
adata_dpis.obs['annot'] = False

Ci_core = {}
Ci = {}
for dpi in trees_dpis_core:
    C_core = np.zeros((len(trees_dpis_core[dpi].nodes), len(trees_dpis_core[dpi].nodes)))
    for nodes, an in nx.algorithms.lowest_common_ancestors.all_pairs_lowest_common_ancestor(trees_dpis_core[dpi]):
        C_core[int(nodes[0]), int(nodes[1])] = nx.dijkstra_path_length(trees_dpis_core[dpi], an, nodes[0]) + nx.dijkstra_path_length(trees_dpis_core[dpi], an, nodes[1])
        C_core[int(nodes[1]), int(nodes[0])] = C_core[int(nodes[0]), int(nodes[1])] 
    Ci_core[dpi] = C_core
    
    cells = (adata_dpis.obs['dpi'] == dpi)
    cells_idx = np.where(adata_dpis.obs['dpi'] == dpi)[0]
    cells_annot = []
    cells_annot_idx = []
    for i, obsi in enumerate(adata_dpis[cells].obs_names):
        if obsi in trees_dpis[dpi].nodes:
            cells_annot.append(obsi)
            cells_annot_idx.append(cells_idx[i])

    adata_dpis.obs.loc[cells_annot, 'annot'] = True
    for i, obsi in enumerate(cells_annot):
        adata_dpis.obs.loc[[obsi], 'core'] = [int(n) for n in trees_dpis[dpi].pred[obsi]][0]


In [47]:
obsi = cells_annot[0]
trees_dpis[dpi].pred[obsi]

AtlasView({'1': {}})

In [23]:
adata_dpis.obs["core"].unique()

array([16,  0,  3,  7, 10,  8,  1,  4, 11,  2, 14,  9, 15, 13,  5, 12,  6,
       21, 19, 18, 23, 20, 29, 17, 28, 25, 24, 22, 27, 26])

In [24]:
adata_dpis_annot = adata_dpis[adata_dpis.obs['annot']].copy()
adata_ctrl_dpi3_dpi7 = adata_dpis_annot[adata_dpis_annot.obs['dpi'].isin([0,3, 7])].copy()

In [25]:
Ci = {}
for j, dpi in enumerate(dpis_unq):
    print(dpi)
    cells = (adata_dpis_annot.obs['dpi'] == dpi)
    cells_idx = np.where(adata_dpis_annot.obs['dpi'] == dpi)[0]
    C = np.zeros((len(cells_idx), len(cells_idx)))
    for i, ci in enumerate(adata_dpis_annot[cells].obs_names):
        C[i, :] = Ci_core[dpi][adata_dpis_annot.obs.loc[[ci], 'core'].values, 
                               adata_dpis_annot.obs.loc[adata_dpis_annot[cells].obs_names,'core'].values]
    Ci[dpi] = pd.DataFrame(C, 
                           index=adata_dpis_annot.obs_names[cells], 
                           columns=adata_dpis_annot.obs_names[cells])


0
3
7


In [27]:
Ci[0].shape, Ci[3].shape, Ci[7].shape

((1036, 1036), (4216, 4216), (9665, 9665))

In [46]:
Ci[0]

Unnamed: 0,H5_AACCGCGAGTCCCACG,H5_AAATGCCGTACCGCTG,H5_AACTCAGCAATGACCT,H5_AACGTTGTCGTAGGAG,H5_AACCATGTCAGCAACT,H5_AAAGATGTCGCGTAGC,H5_AAAGTAGAGTTCGATC,H5_AAACCTGGTCTCATCC,H5_AAACCTGAGTCCATAC,H5_AAAGTAGGTCATATCG,...,H5_TTCCCAGGTCATGCAT,H5_TTTCCTCTCCTGCTTG,H5_TTTGTCATCAACACCA,H5_TTTACTGGTCCAGTAT,H5_TTTGGTTAGTGTTTGC,H5_TTGTAGGCACTTGGAT,H5_TTTACTGGTCCGACGT,H5_TTGTAGGCAGACTCGC,H5_TTGGCAAGTGCACCAC,H5_TTGTAGGGTTAAGGGC
H5_AACCGCGAGTCCCACG,0.0,5.0,4.0,7.0,6.0,5.0,5.0,6.0,5.0,7.0,...,4.0,3.0,3.0,1.0,1.0,4.0,6.0,6.0,7.0,4.0
H5_AAATGCCGTACCGCTG,5.0,0.0,3.0,4.0,1.0,0.0,0.0,1.0,2.0,4.0,...,3.0,2.0,2.0,4.0,4.0,5.0,1.0,3.0,4.0,5.0
H5_AACTCAGCAATGACCT,4.0,3.0,0.0,5.0,4.0,3.0,3.0,4.0,3.0,5.0,...,0.0,1.0,1.0,3.0,3.0,4.0,4.0,4.0,5.0,4.0
H5_AACGTTGTCGTAGGAG,7.0,4.0,5.0,0.0,5.0,4.0,4.0,5.0,2.0,0.0,...,5.0,4.0,4.0,6.0,6.0,7.0,5.0,3.0,0.0,7.0
H5_AACCATGTCAGCAACT,6.0,1.0,4.0,5.0,0.0,1.0,1.0,0.0,3.0,5.0,...,4.0,3.0,3.0,5.0,5.0,6.0,0.0,4.0,5.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
H5_TTGTAGGCACTTGGAT,4.0,5.0,4.0,7.0,6.0,5.0,5.0,6.0,5.0,7.0,...,4.0,3.0,3.0,3.0,3.0,0.0,6.0,6.0,7.0,0.0
H5_TTTACTGGTCCGACGT,6.0,1.0,4.0,5.0,0.0,1.0,1.0,0.0,3.0,5.0,...,4.0,3.0,3.0,5.0,5.0,6.0,0.0,4.0,5.0,6.0
H5_TTGTAGGCAGACTCGC,6.0,3.0,4.0,3.0,4.0,3.0,3.0,4.0,1.0,3.0,...,4.0,3.0,3.0,5.0,5.0,6.0,4.0,0.0,3.0,6.0
H5_TTGGCAAGTGCACCAC,7.0,4.0,5.0,0.0,5.0,4.0,4.0,5.0,2.0,0.0,...,5.0,4.0,4.0,6.0,6.0,7.0,5.0,3.0,0.0,7.0


In [45]:
adata_dpis_annot[adata_dpis_annot.obs["dpi"] == 7].obs.core.unique()

array([ 1, 10, 21, 19,  5,  3, 14, 18, 23,  0, 13, 20, 11, 29, 17,  4, 28,
       16, 25,  6,  9, 24,  7,  2, 22, 12, 27,  8, 15, 26])

In [38]:
adata_dpis_annot.obs["dpi"].unique()

[0, 3, 7]
Categories (3, int64): [0, 3, 7]

In [None]:
Ci_core[dpi][adata_dpis_annot.obs.loc[[ci], 'core'].values, 
                               adata_dpis_annot.obs.loc[adata_dpis_annot[cells].obs_names,'core'].values]

In [1]:
import pandas as pd
import numpy as np

In [29]:
a = np.array(["2", "3", "1"])

In [30]:
np.sort(a)

array(['1', '2', '3'], dtype='<U1')

In [31]:
a

array(['2', '3', '1'], dtype='<U1')

In [32]:
df = pd.DataFrame(data = a, index = range(len(a)), columns=["c_1"])

In [38]:
s = set((3,1,4))

In [39]:
s

{1, 3, 4}

In [40]:
sorted(s)

[1, 3, 4]