# Reformat trajectory data for Dictys pipeline input
## Load data


In [1]:
import pandas as pd
import dictys
dist=pd.read_csv('../tmp/dist.tsv.gz',header=0,index_col=0,sep='\t')
edge=pd.read_csv('../tmp/edge.tsv.gz',header=None,index_col=None,sep='\t')
branch=pd.read_csv('../tmp/branch.tsv.gz',header=None,index_col=None,sep='\t')


## Map ATAC cells onto trajectory
This step is unique to separate profiles of single-cell transcriptome and chromatin accessibility. Here ArchR (https://www.archrproject.com/) is used to map each ATAC cell to one RNA cell following its tutorial. The mapping is not performed here but only the results are provided in `../data/atac_map.tsv.gz` to map ATAC cells onto the trajectory.

Other integration softwares are also compatible as long as they can provide a location for each ATAC cell on the trajectory inferred for RNA cells. This can be mapping each ATAC to RNA cell or to a new location without RNA cell, as long as they can be mapped to a location on the trajectory.

In [2]:
#Load ATAC to RNA cell map
dmap=pd.read_csv('../data/atac_map.tsv.gz',header=None,index_col=0,sep='\t')
dmap.head()

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
scATAC_BMMC_D6T1#TGGTCCTGTATCTGCA-1,CD34_32_R5:GCAGCCATCGTTGCCT-1
scATAC_BMMC_D6T1#GCATTGAAGATTCCGT-1,BMMC_10x_GREENLEAF_REP1:CTCAATTTCAGGCGAA-1
scATAC_BMMC_D6T1#AGTTACGAGAACGTCG-1,BMMC_10x_GREENLEAF_REP2:CTCATGCTCCATGATG-1
scATAC_BMMC_D6T1#GCACCTTAGACTAGCG-1,BMMC_10x_GREENLEAF_REP1:CTCAATTTCAGGCGAA-1
scATAC_BMMC_D6T1#TGTAGCAGTCACTCTC-1,BMMC_10x_GREENLEAF_REP1:TCGGGTGCACTACGGC-1


In [3]:
#Find ATAC cell distance to all nodes
namedict=dict(zip(dist.index,range(len(dist.index))))
dmap=dmap[dmap[1].isin(namedict)]
dist_atac=dist.iloc[[namedict[x] for x in dmap[1].values]]
dist_atac.index=dmap.index
dist_atac.head()


Unnamed: 0_level_0,S0,S1,S2,S3
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
scATAC_BMMC_D6T1#TGGTCCTGTATCTGCA-1,6.999982,13.287467,16.496317,21.329984
scATAC_BMMC_D6T1#GCATTGAAGATTCCGT-1,8.62536,28.912809,18.121695,5.704642
scATAC_BMMC_D6T1#AGTTACGAGAACGTCG-1,9.961382,10.326067,19.457717,24.291384
scATAC_BMMC_D6T1#GCACCTTAGACTAGCG-1,8.62536,28.912809,18.121695,5.704642
scATAC_BMMC_D6T1#TGTAGCAGTCACTCTC-1,8.591259,11.69619,18.087594,22.921261


In [4]:
#Find ATAC cell edge
branch_atac=branch.iloc[[namedict[x] for x in dmap[1].values]]
branch_atac.head()


Unnamed: 0,0,1
7774,0,1
1365,0,3
4522,0,1
1365,0,3
2622,0,1


In [5]:
#Find ATAC cell coordinates in low dimensions
coord=pd.read_csv('../data/coord_rna.tsv.gz',header=0,index_col=0,sep='\t')
coord_atac=coord.iloc[[namedict[x] for x in dmap[1].values]]
coord_atac.index=dmap.index
coord_atac.head()


Unnamed: 0_level_0,x,y
0,Unnamed: 1_level_1,Unnamed: 2_level_1
scATAC_BMMC_D6T1#TGGTCCTGTATCTGCA-1,-9.59114,-1.699753
scATAC_BMMC_D6T1#GCATTGAAGATTCCGT-1,-0.043029,10.9408
scATAC_BMMC_D6T1#AGTTACGAGAACGTCG-1,-12.335616,-5.914646
scATAC_BMMC_D6T1#GCACCTTAGACTAGCG-1,-0.043029,10.9408
scATAC_BMMC_D6T1#TGTAGCAGTCACTCTC-1,-5.372774,-3.239048


## Constructing objects

In [6]:
traj=dictys.traj.trajectory.fromdist(edge.values,dist.values)
point=dictys.traj.point.fromdist(traj,branch.values,dist.values)
point_atac=dictys.traj.point.fromdist(traj,branch_atac.values,dist_atac.values)

## Save data

In [7]:
traj.to_file('../output/traj_node.h5')
point.to_file('../output/traj_cell_rna.h5',traj=False)
point_atac.to_file('../output/traj_cell_atac.h5',traj=False)
coord_atac.to_csv('../output/coord_atac.tsv.gz',header=True,index=True,sep='\t')
