In [1]:
# Removes CPU usage limit by some jupyter versions
import os
os.environ['KMP_AFFINITY'] = ''
# Configure matplotlib to enable large animations
import matplotlib
matplotlib.rcParams['animation.embed_limit'] = 2**128
import matplotlib.pyplot as plt
import pandas as pd
import dictys

# paths
dictys_data_path = "/ocean/projects/cis240075p/asachan/datasets/B_Cell/multiome_1st_donor_UPMC_aggr/tut_files/skin/data"

In [3]:
# Load data
dist = pd.read_csv(os.path.join(dictys_data_path, 'tmp/dist.tsv.gz'), header=0, index_col=0, sep='\t')
edge = pd.read_csv(os.path.join(dictys_data_path, 'tmp/edge.tsv.gz'), header=None, index_col=None, sep='\t')
branch = pd.read_csv(os.path.join(dictys_data_path, 'tmp/branch.tsv.gz'), header=None, index_col=None, sep='\t')

# Display the first few rows of each DataFrame to confirm successful loading
print("dist DataFrame:")
display(dist.head())
print("dist DataFrame shape:", dist.shape)

print("edge DataFrame:")
display(edge.head())

print("branch DataFrame:")
display(branch.head())
print("branch DataFrame shape:", branch.shape)

dist DataFrame:


Unnamed: 0,S0,S1,S2,S3
Trial60.skin.R1.01.R2.02.R3.21.P1.56,1.796973,2.377683,10.384121,4.781205
Trial60.skin.R1.01.R2.03.R3.61.P1.54,8.058921,12.233577,0.528227,11.043153
Trial60.skin.R1.01.R2.06.R3.64.P1.56,3.439011,0.735645,12.026159,6.423243
Trial60.skin.R1.01.R2.06.R3.83.P1.55,1.028344,3.146313,9.615491,4.012575
Trial60.skin.R1.01.R2.11.R3.86.P1.56,1.677695,2.496962,10.264842,4.661926


dist DataFrame shape: (6436, 4)
edge DataFrame:


Unnamed: 0,0,1
0,0,1
1,0,2
2,0,3


branch DataFrame:


Unnamed: 0,0,1
0,0,1
1,0,2
2,0,1
3,0,1
4,0,1


branch DataFrame shape: (6436, 2)


In [3]:
import scanpy as sc
import os 

# load adata
adata = sc.read_h5ad(os.path.join('/ocean/projects/cis240075p/asachan/datasets/B_Cell/multiome_1st_donor_UPMC_aggr/tut_files/skin/data', 'tmp/adata_with_traj.h5ad'))



In [4]:
# get max pseudotime for each branch
S1_max_pseudotime = adata.obs['S1_pseudotime'].max()
S2_max_pseudotime = adata.obs['S2_pseudotime'].max()
S3_max_pseudotime = adata.obs['S3_pseudotime'].max()
S0_max_pseudotime = adata.obs['S0_pseudotime'].max()
print(S1_max_pseudotime)
print(S2_max_pseudotime)
print(S3_max_pseudotime)
print(S0_max_pseudotime)


12.761804034895938
12.761804034895938
11.571379671158407
8.587147894050306


In [4]:
# Save data
traj = dictys.traj.trajectory.fromdist(edge.values, dist.values)
traj_file_path = os.path.join(dictys_data_path, 'traj_node.h5')
traj.to_file(traj_file_path)

point = dictys.traj.point.fromdist(traj, branch.values, dist.values)
point_file_path = os.path.join(dictys_data_path, 'traj_cell_rna.h5')
point.to_file(point_file_path, traj=False)

### Prepare genome related files

In [1]:
%%bash
cd /ocean/projects/cis240075p/asachan/datasets/B_Cell/multiome_1st_donor_UPMC_aggr/tut_files/skin/data

# Motifs (file motifs.motif)
# Option 1: from HOCOMOCO (https://hocomoco11.autosome.org/)
wget -q -o /dev/null -O - 'https://hocomoco11.autosome.org/final_bundle/hocomoco11/full/MOUSE/mono/HOCOMOCOv11_full_MOUSE_mono_homer_format_0.0001.motif' | awk -F "\t" 'BEGIN { OFS = "\t"} {if (substr($1,1,1) == ">") $2=substr($2,1,1)tolower(substr($2,2)); print}' > motifs.motif
# Option 2: from HOMER
# dictys_helper motif_homer.sh -c 3 > motifs.motif
# Option 3: provide your custom motifs

# Reference genome (folder genome)
# Note: You need the same reference genome version with chromatin accessibility reads
# Option 1: download genome from HOMER
dictys_helper genome_homer.sh mm10 genome
# Option 2: provide your custom genome

# Bed file for TSS (file gene.bed)
# Download gtf file from ensembl
wget -q -o /dev/null -O gene.gtf.gz http://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz
gunzip gene.gtf.gz
# Convert to bed
dictys_helper gene_gtf.sh gene.gtf gene.bed
rm gene.gtf


Downloading reference genome mm10 in homer
