In [1]:
import pandas as pd
import numpy as np
import anndata
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import warnings
import pathlib

warnings.filterwarnings('ignore')

In [2]:
from matplotlib import rc
labelsize = 6
linewidth = 0.6
rc('lines', linewidth=linewidth)
rc('axes', labelsize=labelsize, linewidth=linewidth)
rc('xtick', labelsize=labelsize)
rc('ytick', labelsize=labelsize)
rc('xtick.major', width=linewidth)
rc('ytick.major', width=linewidth)
rc('xtick.minor', width=linewidth - 0.2)
rc('ytick.minor', width=linewidth - 0.2)

In [3]:
chrom = 'chr2'
genes = ['Lhx6']
slop = 250000
n_pc = 10
resolution = 1

In [4]:
# Parameters
chrom = "chr1"
genes = ["ENSMUSG00000055067.15", "ENSMUSG00000038331.15", "ENSMUSG00000036155.13", "ENSMUSG00000037138.17", "ENSMUSG00000036766.12", "ENSMUSG00000055013.15", "ENSMUSG00000026090.16", "ENSMUSG00000026235.14", "ENSMUSG00000033740.17", "ENSMUSG00000040265.16", "ENSMUSG00000101214.1", "ENSMUSG00000038305.15", "ENSMUSG00000050967.5", "ENSMUSG00000026504.17", "ENSMUSG00000100851.1", "ENSMUSG00000049690.15", "ENSMUSG00000026604.17", "ENSMUSG00000025969.15", "ENSMUSG00000036815.16", "ENSMUSG00000056536.14", "ENSMUSG00000100827.1", "ENSMUSG00000028033.16", "ENSMUSG00000052062.14", "ENSMUSG00000042429.8", "ENSMUSG00000039372.5", "ENSMUSG00000025959.13", "ENSMUSG00000062209.15", "ENSMUSG00000004110.17", "ENSMUSG00000026469.14", "ENSMUSG00000026335.16", "ENSMUSG00000070695.4", "ENSMUSG00000053963.7", "ENSMUSG00000041757.16", "ENSMUSG00000026603.13", "ENSMUSG00000015829.13", "ENSMUSG00000038473.14", "ENSMUSG00000008136.14", "ENSMUSG00000026482.13", "ENSMUSG00000026313.16", "ENSMUSG00000026058.11", "ENSMUSG00000042581.14", "ENSMUSG00000026077.15", "ENSMUSG00000066842.18", "ENSMUSG00000089872.10", "ENSMUSG00000019699.16", "ENSMUSG00000063659.11", "ENSMUSG00000026609.15", "ENSMUSG00000052534.15", "ENSMUSG00000030768.12", "ENSMUSG00000025935.10", "ENSMUSG00000057329.7", "ENSMUSG00000036206.12", "ENSMUSG00000102789.1", "ENSMUSG00000052726.15", "ENSMUSG00000026509.15", "ENSMUSG00000037568.12", "ENSMUSG00000018417.14", "ENSMUSG00000066877.11", "ENSMUSG00000025915.14", "ENSMUSG00000040710.10", "ENSMUSG00000004031.13", "ENSMUSG00000102432.1", "ENSMUSG00000026187.9", "ENSMUSG00000026447.16", "ENSMUSG00000026163.17", "ENSMUSG00000026463.17", "ENSMUSG00000039239.14", "ENSMUSG00000092083.4", "ENSMUSG00000032666.16", "ENSMUSG00000040612.13", "ENSMUSG00000042671.12", "ENSMUSG00000038349.10", "ENSMUSG00000016179.11", "ENSMUSG00000035131.14", "ENSMUSG00000038608.15", "ENSMUSG00000042807.15", "ENSMUSG00000073528.7", "ENSMUSG00000052331.14", "ENSMUSG00000026344.9", "ENSMUSG00000003134.10", "ENSMUSG00000026393.10", "ENSMUSG00000060424.14", "ENSMUSG00000010175.13", "ENSMUSG00000009418.16", "ENSMUSG00000073678.4", "ENSMUSG00000026430.16", "ENSMUSG00000026383.14", "ENSMUSG00000056268.15", "ENSMUSG00000033569.17", "ENSMUSG00000036086.16", "ENSMUSG00000022995.16", "ENSMUSG00000045658.16", "ENSMUSG00000026674.9", "ENSMUSG00000055214.15", "ENSMUSG00000055980.2", "ENSMUSG00000026458.13", "ENSMUSG00000045648.15", "ENSMUSG00000051951.5", "ENSMUSG00000026527.13", "ENSMUSG00000050840.8", "ENSMUSG00000026288.14", "ENSMUSG00000057715.13", "ENSMUSG00000026259.14", "ENSMUSG00000087082.1", "ENSMUSG00000098243.3", "ENSMUSG00000053153.14", "ENSMUSG00000013275.9", "ENSMUSG00000034353.14", "ENSMUSG00000038949.8", "ENSMUSG00000103393.1", "ENSMUSG00000067879.3", "ENSMUSG00000032649.14", "ENSMUSG00000002881.14", "ENSMUSG00000042066.15", "ENSMUSG00000044340.7", "ENSMUSG00000079465.8", "ENSMUSG00000038576.15", "ENSMUSG00000037624.15", "ENSMUSG00000025938.16", "ENSMUSG00000100811.1", "ENSMUSG00000026610.13", "ENSMUSG00000037375.16", "ENSMUSG00000026031.15", "ENSMUSG00000043019.12", "ENSMUSG00000026620.11", "ENSMUSG00000026343.6", "ENSMUSG00000054976.14", "ENSMUSG00000070565.11", "ENSMUSG00000102009.6", "ENSMUSG00000097454.1", "ENSMUSG00000073557.12", "ENSMUSG00000039354.16", "ENSMUSG00000025964.15", "ENSMUSG00000100832.1", "ENSMUSG00000026514.14", "ENSMUSG00000016918.15", "ENSMUSG00000026110.15", "ENSMUSG00000026347.13", "ENSMUSG00000026594.14", "ENSMUSG00000042686.5", "ENSMUSG00000104021.1", "ENSMUSG00000049866.12", "ENSMUSG00000087514.1", "ENSMUSG00000026478.14", "ENSMUSG00000054006.14", "ENSMUSG00000026134.11", "ENSMUSG00000026686.14", "ENSMUSG00000025991.8", "ENSMUSG00000047496.6", "ENSMUSG00000038463.8", "ENSMUSG00000059149.17", "ENSMUSG00000060985.15", "ENSMUSG00000026131.19", "ENSMUSG00000042751.14", "ENSMUSG00000041642.18", "ENSMUSG00000026142.15", "ENSMUSG00000015843.10", "ENSMUSG00000085391.1", "ENSMUSG00000026074.15", "ENSMUSG00000101102.1", "ENSMUSG00000093650.2", "ENSMUSG00000079330.9", "ENSMUSG00000042451.12", "ENSMUSG00000101693.1", "ENSMUSG00000039384.8", "ENSMUSG00000026556.15", "ENSMUSG00000026312.17", "ENSMUSG00000118219.1", "ENSMUSG00000099706.1", "ENSMUSG00000100457.6", "ENSMUSG00000026065.8", "ENSMUSG00000044768.16", "ENSMUSG00000100120.1", "ENSMUSG00000097448.1", "ENSMUSG00000042182.16", "ENSMUSG00000026640.12", "ENSMUSG00000026301.16", "ENSMUSG00000037509.21", "ENSMUSG00000098145.1", "ENSMUSG00000048960.13", "ENSMUSG00000101640.1", "ENSMUSG00000041439.15", "ENSMUSG00000082373.2", "ENSMUSG00000100534.1", "ENSMUSG00000079045.3", "ENSMUSG00000089706.7", "ENSMUSG00000097892.2", "ENSMUSG00000026470.14", "ENSMUSG00000039210.16", "ENSMUSG00000016200.14", "ENSMUSG00000099950.1", "ENSMUSG00000005886.14", "ENSMUSG00000042046.15", "ENSMUSG00000026062.12", "ENSMUSG00000032908.9", "ENSMUSG00000050122.19", "ENSMUSG00000048402.14", "ENSMUSG00000093436.6", "ENSMUSG00000034066.13", "ENSMUSG00000063681.14", "ENSMUSG00000100426.1", "ENSMUSG00000086836.1", "ENSMUSG00000016528.10", "ENSMUSG00000039318.12", "ENSMUSG00000070738.10", "ENSMUSG00000067028.11", "ENSMUSG00000026109.14", "ENSMUSG00000089683.2", "ENSMUSG00000026510.10", "ENSMUSG00000025946.13", "ENSMUSG00000070871.10", "ENSMUSG00000007805.4", "ENSMUSG00000026305.15", "ENSMUSG00000101895.1", "ENSMUSG00000089713.1", "ENSMUSG00000026688.5", "ENSMUSG00000053024.14", "ENSMUSG00000047361.16", "ENSMUSG00000051185.9", "ENSMUSG00000102481.1", "ENSMUSG00000026567.16", "ENSMUSG00000015484.3", "ENSMUSG00000055322.15", "ENSMUSG00000056870.9", "ENSMUSG00000026073.13", "ENSMUSG00000100553.6", "ENSMUSG00000103032.1", "ENSMUSG00000087131.7", "ENSMUSG00000026018.12", "ENSMUSG00000038855.10", "ENSMUSG00000032883.15", "ENSMUSG00000101628.1", "ENSMUSG00000026443.3", "ENSMUSG00000026407.17", "ENSMUSG00000026141.13", "ENSMUSG00000093538.2", "ENSMUSG00000060568.14", "ENSMUSG00000103291.1", "ENSMUSG00000086727.8", "ENSMUSG00000026525.9", "ENSMUSG00000025997.13", "ENSMUSG00000042115.4", "ENSMUSG00000026162.7", "ENSMUSG00000050069.3", "ENSMUSG00000079671.8", "ENSMUSG00000026523.14", "ENSMUSG00000015222.17", "ENSMUSG00000057173.8", "ENSMUSG00000040836.15", "ENSMUSG00000103467.1", "ENSMUSG00000043760.16", "ENSMUSG00000101799.1", "ENSMUSG00000101674.6", "ENSMUSG00000067158.9", "ENSMUSG00000026623.16", "ENSMUSG00000099413.1", "ENSMUSG00000026307.12", "ENSMUSG00000089699.1", "ENSMUSG00000052760.16", "ENSMUSG00000026123.11", "ENSMUSG00000048126.16", "ENSMUSG00000072295.6", "ENSMUSG00000045216.7", "ENSMUSG00000026611.17", "ENSMUSG00000041907.9", "ENSMUSG00000026014.15", "ENSMUSG00000026258.5", "ENSMUSG00000104435.1", "ENSMUSG00000097648.1", "ENSMUSG00000097540.1", "ENSMUSG00000086204.1", "ENSMUSG00000026011.13", "ENSMUSG00000043015.15", "ENSMUSG00000026295.12", "ENSMUSG00000097063.3", "ENSMUSG00000101890.1", "ENSMUSG00000026615.14", "ENSMUSG00000026425.15", "ENSMUSG00000103971.1", "ENSMUSG00000026098.13", "ENSMUSG00000090322.1", "ENSMUSG00000026102.9", "ENSMUSG00000016494.9", "ENSMUSG00000033488.11", "ENSMUSG00000089767.1", "ENSMUSG00000101768.1", "ENSMUSG00000099868.1", "ENSMUSG00000026437.11", "ENSMUSG00000054702.14", "ENSMUSG00000026384.13", "ENSMUSG00000103101.1", "ENSMUSG00000056220.14", "ENSMUSG00000026587.15", "ENSMUSG00000026207.16", "ENSMUSG00000006576.16", "ENSMUSG00000062310.7", "ENSMUSG00000046337.17", "ENSMUSG00000004451.14", "ENSMUSG00000002459.17", "ENSMUSG00000085526.1", "ENSMUSG00000033722.9", "ENSMUSG00000043629.12", "ENSMUSG00000026339.18", "ENSMUSG00000004880.11", "ENSMUSG00000026452.15", "ENSMUSG00000026605.14", "ENSMUSG00000026361.9", "ENSMUSG00000026193.15", "ENSMUSG00000026566.15", "ENSMUSG00000038936.13", "ENSMUSG00000044689.5", "ENSMUSG00000034220.7", "ENSMUSG00000038034.15", "ENSMUSG00000026104.14", "ENSMUSG00000026483.13", "ENSMUSG00000014980.14", "ENSMUSG00000026072.12", "ENSMUSG00000101586.1", "ENSMUSG00000034212.14", "ENSMUSG00000026024.14", "ENSMUSG00000026632.17", "ENSMUSG00000090260.1", "ENSMUSG00000041426.12", "ENSMUSG00000026395.16", "ENSMUSG00000099960.1", "ENSMUSG00000026103.14", "ENSMUSG00000100798.1", "ENSMUSG00000025931.15", "ENSMUSG00000040865.15", "ENSMUSG00000101168.1", "ENSMUSG00000103305.1", "ENSMUSG00000038370.6", "ENSMUSG00000102650.1", "ENSMUSG00000067006.12", "ENSMUSG00000087022.4", "ENSMUSG00000096141.2", "ENSMUSG00000025932.14", "ENSMUSG00000026179.14", "ENSMUSG00000026479.13", "ENSMUSG00000097881.7", "ENSMUSG00000026147.16", "ENSMUSG00000004709.14", "ENSMUSG00000103613.1", "ENSMUSG00000026356.15", "ENSMUSG00000104396.1", "ENSMUSG00000056055.13", "ENSMUSG00000055567.18", "ENSMUSG00000025776.13", "ENSMUSG00000085842.7", "ENSMUSG00000103251.1", "ENSMUSG00000025937.6", "ENSMUSG00000026009.14", "ENSMUSG00000038026.12", "ENSMUSG00000090272.8", "ENSMUSG00000101299.2", "ENSMUSG00000100627.6", "ENSMUSG00000037461.10", "ENSMUSG00000102475.1", "ENSMUSG00000104210.1", "ENSMUSG00000102784.1", "ENSMUSG00000026278.14", "ENSMUSG00000015962.5", "ENSMUSG00000026404.12", "ENSMUSG00000026220.6", "ENSMUSG00000047216.8", "ENSMUSG00000026600.12", "ENSMUSG00000039748.11", "ENSMUSG00000016262.14", "ENSMUSG00000055866.9", "ENSMUSG00000041144.11", "ENSMUSG00000039783.15", "ENSMUSG00000099938.1", "ENSMUSG00000026558.13", "ENSMUSG00000091283.1", "ENSMUSG00000100580.6", "ENSMUSG00000099141.1", "ENSMUSG00000005338.13", "ENSMUSG00000096941.1", "ENSMUSG00000101583.1", "ENSMUSG00000102368.1", "ENSMUSG00000103948.1", "ENSMUSG00000026532.7", "ENSMUSG00000101514.1", "ENSMUSG00000009905.5", "ENSMUSG00000026249.10", "ENSMUSG00000051344.13", "ENSMUSG00000073608.4", "ENSMUSG00000026721.16", "ENSMUSG00000040693.7", "ENSMUSG00000055547.4", "ENSMUSG00000100221.6", "ENSMUSG00000079588.3", "ENSMUSG00000025983.11", "ENSMUSG00000044835.15", "ENSMUSG00000016181.9", "ENSMUSG00000026490.18", "ENSMUSG00000041498.13", "ENSMUSG00000090079.1", "ENSMUSG00000099760.1", "ENSMUSG00000040485.5", "ENSMUSG00000038776.13", "ENSMUSG00000087233.1", "ENSMUSG00000085125.7", "ENSMUSG00000073633.9", "ENSMUSG00000073481.9", "ENSMUSG00000026354.8", "ENSMUSG00000026398.14", "ENSMUSG00000102331.1", "ENSMUSG00000101880.1", "ENSMUSG00000042793.13", "ENSMUSG00000040596.15", "ENSMUSG00000034292.13", "ENSMUSG00000098509.2", "ENSMUSG00000026563.13", "ENSMUSG00000026080.13", "ENSMUSG00000026211.17", "ENSMUSG00000079550.9", "ENSMUSG00000026342.10", "ENSMUSG00000026311.16", "ENSMUSG00000042197.13", "ENSMUSG00000103329.2", "ENSMUSG00000026042.16", "ENSMUSG00000038866.15", "ENSMUSG00000089822.1", "ENSMUSG00000097113.7", "ENSMUSG00000085822.2", "ENSMUSG00000026380.10", "ENSMUSG00000048096.7", "ENSMUSG00000026663.6", "ENSMUSG00000044337.5", "ENSMUSG00000102608.1", "ENSMUSG00000101288.1", "ENSMUSG00000007097.14", "ENSMUSG00000102565.1", "ENSMUSG00000026279.14", "ENSMUSG00000101705.1", "ENSMUSG00000101429.1", "ENSMUSG00000078185.3", "ENSMUSG00000104181.1", "ENSMUSG00000042849.11", "ENSMUSG00000062510.12", "ENSMUSG00000103976.1", "ENSMUSG00000103288.1", "ENSMUSG00000079434.8", "ENSMUSG00000104302.1", "ENSMUSG00000079554.2", "ENSMUSG00000026113.17", "ENSMUSG00000026596.13", "ENSMUSG00000102950.1", "ENSMUSG00000104039.1", "ENSMUSG00000026589.14", "ENSMUSG00000026565.18", "ENSMUSG00000051285.17", "ENSMUSG00000026580.16", "ENSMUSG00000047793.13", "ENSMUSG00000064294.12", "ENSMUSG00000103614.1", "ENSMUSG00000004872.15", "ENSMUSG00000026389.16", "ENSMUSG00000025986.6", "ENSMUSG00000033544.6", "ENSMUSG00000099510.2", "ENSMUSG00000085280.1", "ENSMUSG00000102343.1", "ENSMUSG00000066595.9", "ENSMUSG00000040181.14", "ENSMUSG00000100955.1", "ENSMUSG00000036975.5", "ENSMUSG00000028354.13", "ENSMUSG00000099964.6", "ENSMUSG00000100367.1", "ENSMUSG00000026495.8", "ENSMUSG00000026192.13", "ENSMUSG00000003458.12", "ENSMUSG00000026489.13", "ENSMUSG00000101156.1", "ENSMUSG00000026336.13", "ENSMUSG00000041779.5", "ENSMUSG00000103308.5", "ENSMUSG00000045463.15", "ENSMUSG00000101265.1", "ENSMUSG00000037318.10", "ENSMUSG00000090394.8", "ENSMUSG00000104365.1", "ENSMUSG00000063558.4", "ENSMUSG00000100484.1", "ENSMUSG00000100945.1", "ENSMUSG00000101662.1", "ENSMUSG00000099895.1", "ENSMUSG00000025961.1", "ENSMUSG00000073494.5", "ENSMUSG00000033701.13", "ENSMUSG00000026156.8", "ENSMUSG00000026637.13", "ENSMUSG00000102761.1", "ENSMUSG00000109887.1", "ENSMUSG00000101968.6", "ENSMUSG00000009772.15", "ENSMUSG00000099752.1", "ENSMUSG00000026070.15", "ENSMUSG00000087247.3", "ENSMUSG00000100781.1", "ENSMUSG00000037860.15", "ENSMUSG00000099969.1", "ENSMUSG00000099933.1", "ENSMUSG00000026442.14", "ENSMUSG00000103659.1", "ENSMUSG00000025905.13", "ENSMUSG00000033007.15", "ENSMUSG00000025900.12", "ENSMUSG00000102856.1", "ENSMUSG00000033684.14", "ENSMUSG00000099553.1", "ENSMUSG00000064246.10", "ENSMUSG00000102752.1", "ENSMUSG00000051079.8", "ENSMUSG00000041559.7", "ENSMUSG00000037995.15", "ENSMUSG00000025920.19", "ENSMUSG00000062939.11", "ENSMUSG00000026348.7", "ENSMUSG00000042501.12", "ENSMUSG00000104378.1", "ENSMUSG00000026023.16", "ENSMUSG00000100589.1", "ENSMUSG00000034107.10", "ENSMUSG00000073643.11", "ENSMUSG00000026153.15", "ENSMUSG00000026417.13", "ENSMUSG00000026321.7", "ENSMUSG00000042268.10", "ENSMUSG00000102413.1", "ENSMUSG00000026388.15", "ENSMUSG00000026626.11", "ENSMUSG00000102234.1", "ENSMUSG00000026390.7", "ENSMUSG00000084989.3", "ENSMUSG00000103043.1", "ENSMUSG00000048865.16", "ENSMUSG00000026692.12", "ENSMUSG00000104436.1", "ENSMUSG00000101812.1", "ENSMUSG00000102449.1", "ENSMUSG00000042251.12", "ENSMUSG00000100711.1", "ENSMUSG00000086264.9", "ENSMUSG00000101995.1", "ENSMUSG00000103510.1", "ENSMUSG00000039246.8", "ENSMUSG00000097899.2", "ENSMUSG00000101348.6", "ENSMUSG00000037447.16", "ENSMUSG00000101574.1", "ENSMUSG00000039395.8", "ENSMUSG00000101227.1", "ENSMUSG00000100515.6", "ENSMUSG00000026586.16", "ENSMUSG00000104117.5", "ENSMUSG00000026021.15", "ENSMUSG00000042772.15", "ENSMUSG00000058076.12", "ENSMUSG00000006301.17", "ENSMUSG00000086064.1", "ENSMUSG00000103146.1", "ENSMUSG00000041406.14", "ENSMUSG00000079470.8", "ENSMUSG00000089933.1", "ENSMUSG00000100459.6", "ENSMUSG00000026526.14", "ENSMUSG00000025993.10", "ENSMUSG00000037432.15", "ENSMUSG00000026614.7", "ENSMUSG00000023150.14", "ENSMUSG00000102509.1", "ENSMUSG00000103644.1", "ENSMUSG00000052688.12", "ENSMUSG00000007107.6", "ENSMUSG00000101741.1", "ENSMUSG00000103895.1", "ENSMUSG00000103432.1", "ENSMUSG00000052748.14", "ENSMUSG00000104027.1", "ENSMUSG00000067081.13", "ENSMUSG00000093132.1", "ENSMUSG00000102838.1", "ENSMUSG00000099378.1", "ENSMUSG00000101011.6", "ENSMUSG00000026687.14", "ENSMUSG00000104417.1", "ENSMUSG00000096992.3", "ENSMUSG00000041577.5", "ENSMUSG00000096929.7", "ENSMUSG00000104114.1", "ENSMUSG00000025977.15", "ENSMUSG00000099401.1", "ENSMUSG00000054412.5", "ENSMUSG00000026019.15", "ENSMUSG00000102879.1", "ENSMUSG00000040648.14", "ENSMUSG00000026725.17", "ENSMUSG00000103079.1", "ENSMUSG00000102483.1", "ENSMUSG00000097272.1", "ENSMUSG00000101224.1", "ENSMUSG00000103670.1", "ENSMUSG00000102594.1", "ENSMUSG00000103719.1", "ENSMUSG00000048775.10", "ENSMUSG00000026678.10", "ENSMUSG00000033964.12", "ENSMUSG00000026575.15", "ENSMUSG00000066672.2", "ENSMUSG00000056211.13", "ENSMUSG00000099922.1", "ENSMUSG00000026601.14", "ENSMUSG00000026331.13", "ENSMUSG00000100429.1", "ENSMUSG00000094410.7", "ENSMUSG00000116275.1", "ENSMUSG00000102976.6", "ENSMUSG00000103780.1", "ENSMUSG00000053161.16", "ENSMUSG00000042596.7", "ENSMUSG00000060771.14", "ENSMUSG00000026705.16", "ENSMUSG00000026000.16", "ENSMUSG00000026413.12", "ENSMUSG00000104057.1", "ENSMUSG00000025934.15", "ENSMUSG00000064302.13", "ENSMUSG00000101764.1", "ENSMUSG00000089914.2", "ENSMUSG00000099957.3", "ENSMUSG00000050217.13", "ENSMUSG00000042215.8", "ENSMUSG00000099469.1", "ENSMUSG00000104026.1", "ENSMUSG00000026554.15", "ENSMUSG00000091393.1", "ENSMUSG00000026116.11", "ENSMUSG00000039342.5", "ENSMUSG00000085447.1", "ENSMUSG00000022591.5", "ENSMUSG00000102760.1", "ENSMUSG00000041879.13", "ENSMUSG00000103961.1", "ENSMUSG00000026355.11", "ENSMUSG00000073530.11", "ENSMUSG00000051985.12", "ENSMUSG00000097850.2", "ENSMUSG00000026670.15", "ENSMUSG00000102676.1", "ENSMUSG00000026502.13", "ENSMUSG00000041889.7", "ENSMUSG00000070942.8", "ENSMUSG00000033021.16", "ENSMUSG00000102123.1", "ENSMUSG00000100180.1", "ENSMUSG00000109809.1", "ENSMUSG00000102240.1"]
slop = 250000
n_pc = 10
resolution = 1


In [5]:
output_dir = pathlib.Path(chrom)
output_dir.mkdir(exist_ok=True)

## Cell Meta

In [6]:
cell_tidy_data = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/ClusteringSummary/Summary/TotalClusteringResults.msg'
)
use_clusters = [
    i.replace(' ', '_') for i in cell_tidy_data[cell_tidy_data['CellClass'].isin(['Exc', 'Inh'])]
    ['SubType'].unique() if 'Outlier' not in i
]
len(use_clusters)

145

## ATAC peaks

In [7]:
atac_peak = pd.read_msgpack('/home/hanliu/project/mouse_rostral_brain/study/DMRCluster/SubType.ATAC_peak_merged.msg')
atac_peak = atac_peak.loc[atac_peak.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

## Gene Info

In [8]:
gene_meta = pd.read_csv(
    '/home/hanliu/ref/mouse/gencode/vm22/gencode.vM22.annotation.gene.flat.tsv.gz',
    index_col='gene_id', sep='\t'
)
gene_meta = gene_meta[gene_meta['chrom'] == chrom].copy()

In [9]:
exon_bed = pd.read_csv('/home/hanliu/ref/mouse/gencode/vm22/genome_anno/exon.all.bed',
                       header=None, sep='\t')
exon_bed.columns = ['chrom', 'start', 'end', 'gene_id', 'gene_name']

## DMR Info

In [10]:
with pd.HDFStore('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRInfo.h5', 'r') as hdf:
    dmr_rate = hdf['Rate']
dmr_rate = dmr_rate.loc[dmr_rate.index.map(lambda i: i.startswith(f'Sub{chrom}_'))].copy()

dmr_corr = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/DMRGeneCorr/TotalGeneDMRCorrLoop.0.3.msg'
)
dmr_corr = dmr_corr.set_index(['DMR', 'Gene'])
dmr_corr = dmr_corr.loc[dmr_corr.index.get_level_values('DMR').isin(dmr_rate.index)].copy()

dmr_bed = pd.read_csv('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalDMR.nofilter.bed',
                      sep='\t', header=None, index_col=3)
dmr_bed.columns = ['chrom', 'start', 'end']
dmr_bed = dmr_bed[dmr_bed['chrom'] == chrom].copy()

dmr_hits = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalHits.HypoDMR.h5ad')
dmr_hits = dmr_hits[dmr_rate.index, :].copy()
dmr_hits = dmr_hits[:, use_clusters].copy()

dmr_annot = anndata.read_h5ad('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRAnnotation.h5ad')
dmr_annot = dmr_annot[dmr_rate.index, :].copy()

In [11]:
dmr_hits.shape[0]

350976

In [12]:
def get_gene(gene_id):
    _gene = gene_meta.loc[gene_id]
    return _gene.name, _gene['chrom'], _gene['start'], _gene['end'], _gene['strand']

## Gene's DMR clustering

In [13]:
def calculate_gene(gene_id):
    # get gene information
    gene_id, _, gene_start, gene_end, _ = get_gene(gene_id)

    # select related DMRs
    related_dmr = dmr_bed[(dmr_bed['start'] > gene_start - slop) &
                          (dmr_bed['end'] < gene_end + slop)].copy()
    
    related_dmr_rate = dmr_rate.loc[related_dmr.index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    
    # construct Adata
    adata = anndata.AnnData(X=related_dmr_rate.values.copy(),
                            obs=pd.DataFrame([], related_dmr_rate.index),
                            var=pd.DataFrame([], related_dmr_rate.columns))
    sc.pp.scale(adata)
    sc.pp.pca(adata)
    
    pcs = adata.obsm['X_pca'][:, :n_pc]
    sc.pp.neighbors(adata, n_neighbors=int(round(np.log2(adata.shape[0]))), n_pcs=n_pc)
    sc.tl.leiden(adata, resolution=resolution)
    return adata.obs


def get_annotation(gene_id):
    gene_id, _, gene_start, gene_end, strand = get_gene(gene_id)
    tss = gene_start if strand == '+' else gene_end
    gene_cluster = calculate_gene(gene_id)
    this_corr = dmr_corr[dmr_corr.index.get_level_values('Gene') == gene_id]['Corr']
    this_corr.index = this_corr.index.droplevel('Gene')
    gene_cluster['Corr'] = gene_cluster.index.map(this_corr).fillna(0)
    
    this_dmr_bed = dmr_bed.loc[gene_cluster.index]
    dmr_center = (this_dmr_bed['end'] + this_dmr_bed['start']) / 2
    gene_length = gene_end - gene_start
    if strand == '+':
        gene_cluster['reldist_tss'] = (dmr_center - gene_start) / gene_length
    else:
        gene_cluster['reldist_tss'] = (gene_end - dmr_center) / gene_length
    gene_cluster['in_gene_body'] = (gene_cluster['reldist_tss'] > 0) & (gene_cluster['reldist_tss'] < 1)
    
    this_annot = dmr_annot[gene_cluster.index]
    annot_df = pd.DataFrame(this_annot.X.todense(), 
                 index=this_annot.obs_names, columns=this_annot.var_names)
    
    # annotate TE cols
    dna_te = annot_df.columns[20:33]
    gene_cluster['is_dna_te'] = annot_df[dna_te].sum(axis=1) != 0
    
    line_te = annot_df.columns[33:39]
    gene_cluster['is_line_te'] = annot_df[line_te].sum(axis=1) != 0
    
    ltr_te = annot_df.columns[39:45]
    gene_cluster['is_ltr_te'] = annot_df[ltr_te].sum(axis=1) != 0
    
    sine_te = annot_df.columns[45:52]
    gene_cluster['is_sine_te'] = annot_df[sine_te].sum(axis=1) != 0
    
    # this dmr within GOI's gene feature
    gene_cluster['in_intron'] = annot_df['intron'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_exon'] = annot_df['exon'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr3'] = annot_df['UTR3'].astype(bool) & gene_cluster['in_gene_body']
    gene_cluster['in_utr5'] = annot_df['UTR5'].astype(bool) & gene_cluster['in_gene_body']
    
    # previous mC study
    gene_cluster['feDMR'] = annot_df['feDMR'].astype(bool)
    gene_cluster['adultDMR'] = annot_df['adultDMR'].astype(bool)
    
    
    other_profiles = []
    # DMR hypo call in each cluster
    this_hypo_hits = dmr_hits[gene_cluster.index]
    hits_df = pd.DataFrame(this_hypo_hits.X.todense(), 
                 index=this_hypo_hits.obs_names, columns=this_hypo_hits.var_names)
    hits_df.columns = hits_df.columns.map(lambda i: f'HypoDMR.{i}')
    other_profiles.append(hits_df)
    
    # DMR rate
    related_dmr_rate = dmr_rate.loc[gene_cluster['leiden'].sort_values().index, use_clusters].copy()
    related_dmr_rate.fillna(related_dmr_rate.mean(), axis=0, inplace=True)
    related_dmr_rate.columns = related_dmr_rate.columns.map(lambda i: f'DMRRate.{i}')
    other_profiles.append(related_dmr_rate)
    
    # atac peak
    atac_peak_df = atac_peak.loc[related_dmr_rate.index, use_clusters].copy()
    other_profiles.append(atac_peak_df)
    atac_peak_df.columns = atac_peak_df.columns.map(lambda i: f'ATACPeak.{i}')
    
    dmr_annotation = pd.concat([gene_cluster] + other_profiles, axis=1, sort=True)
    return dmr_annotation

In [15]:
for gene in genes:
    print(gene)
    check_path = output_dir / f'{gene}.DMR_cluster.msg'
    if check_path.exists():
        continue
    
    dmr_annotation = get_annotation(gene)
    dmr_annotation.to_msgpack(output_dir / f'{gene}.DMR_detail.msg', compress='zlib')
    
    cluster_annotation = dmr_annotation.groupby('leiden').mean()
    cluster_annotation.to_msgpack(output_dir / f'{gene}.DMR_cluster.msg', compress='zlib')

ENSMUSG00000055067.15
ENSMUSG00000038331.15
ENSMUSG00000036155.13
ENSMUSG00000037138.17
ENSMUSG00000036766.12
ENSMUSG00000055013.15
ENSMUSG00000026090.16
ENSMUSG00000026235.14
ENSMUSG00000033740.17
ENSMUSG00000040265.16
ENSMUSG00000101214.1
ENSMUSG00000038305.15
ENSMUSG00000050967.5
ENSMUSG00000026504.17
ENSMUSG00000100851.1
ENSMUSG00000049690.15
ENSMUSG00000026604.17
ENSMUSG00000025969.15
ENSMUSG00000036815.16
ENSMUSG00000056536.14
ENSMUSG00000100827.1
ENSMUSG00000028033.16
ENSMUSG00000052062.14
ENSMUSG00000042429.8
ENSMUSG00000039372.5
ENSMUSG00000025959.13
ENSMUSG00000062209.15
ENSMUSG00000004110.17
ENSMUSG00000026469.14
ENSMUSG00000026335.16
ENSMUSG00000070695.4
ENSMUSG00000053963.7
ENSMUSG00000041757.16
ENSMUSG00000026603.13
ENSMUSG00000015829.13
ENSMUSG00000038473.14
ENSMUSG00000008136.14
ENSMUSG00000026482.13
ENSMUSG00000026313.16
ENSMUSG00000026058.11
ENSMUSG00000042581.14
ENSMUSG00000026077.15
ENSMUSG00000066842.18
ENSMUSG00000089872.10
ENSMUSG00000019699.16
ENSMUSG00000063659