In [1]:
from concurrent.futures import ProcessPoolExecutor, as_completed
import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pybedtools
import scipy.stats as stats
import seaborn as sns
import joblib
import pathlib
from statsmodels.stats.multitest import multipletests

## Parameters

In [2]:
# relavent score cutoff
rs_cutoff = 0.3
min_dmr_to_test = 1000 # on either side

# motif enrichment
or_cutoff = 1.6
neg_lgp_cutoff = 10
mask_quantile_to_max = 0.8

## Load Data

### Motif gene

In [4]:
motif_gene_anno = pd.read_csv(
    '/home/hanliu/project/mouse_rostral_brain/study/MotifClustering/JASPAR2020_CORE_vertebrates_non-redundant.mouse_genes.with_motif_group.199.csv', 
    index_col=0
)
motif_gene_anno.head()

Unnamed: 0_level_0,motif_name,motif_genes,gene_ids,gene_names,motif_group
motif_uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MA0006.1,Ahr::Arnt,"Ahr,Arnt","ENSMUSG00000019256.17,ENSMUSG00000015522.18","Ahr,Arnt",MotifGroup178
MA0854.1,Alx1,Alx1,ENSMUSG00000036602.14,Alx1,MotifGroup3
MA0634.1,ALX3,ALX3,ENSMUSG00000014603.3,Alx3,MotifGroup3
MA0853.1,Alx4,Alx4,ENSMUSG00000040310.12,Alx4,MotifGroup3
MA0007.3,Ar,Ar,ENSMUSG00000046532.8,Ar,MotifGroup32


### Node Data

In [4]:
adata = anndata.read_h5ad('PairwiseDMR.h5ad')

In [5]:
use_dmr = adata.var_names[((adata.X != 0).sum(axis=0) != 0).A1]

### DMR Bed and Rate

In [6]:
with pd.HDFStore('/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/DMRInfo.h5') as hdf:
    dmr_bed_df = hdf['bed'].loc[use_dmr].copy()
    dmr_rate = hdf['Rate'].loc[use_dmr].copy()

### DMR annot

In [7]:
dmr_annot = anndata.read_h5ad(
    '/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/MotifScan.h5ad'
)
# mask small motif scores
motif_cutoff = pd.Series(dmr_annot.X.max(axis=0).todense().A1 * mask_quantile_to_max, index=dmr_annot.var_names)

dmr_annot = dmr_annot[use_dmr, :].copy()
dmr_annot

AnnData object with n_obs × n_vars = 2267114 × 719 
    obs: 'chrom', 'start', 'end'

## Refilter scores

In [8]:
# only keep value larger than the cutoff for each motif
dmr_annot.X = dmr_annot.X.multiply(
    (dmr_annot.X >
     motif_cutoff[dmr_annot.var_names].values[None, :]).astype(int)).tocsr()

## Prepare test input

In [9]:
def prepare_table(pair_id):
    # get pair dmr
    a, b = adata.obs.loc[pair_id]
    this_dmrs = adata.var_names[adata.var_vector(pair_id).astype(bool)]
    this_dmr_rate = dmr_rate.loc[this_dmrs, [a, b]]
    a_hypo = this_dmr_rate.index[this_dmr_rate[a] < this_dmr_rate[b]]
    b_hypo = this_dmr_rate.index[this_dmr_rate[a] > this_dmr_rate[b]]
    
    # get dmr motif hits annotation
    left_dmr_annot = dmr_annot[a_hypo, :]
    right_dmr_annot = dmr_annot[b_hypo, :]

    # if DMR is not enough, skip and return empty record
    if (a_hypo.size < min_dmr_to_test) or (b_hypo.size < min_dmr_to_test):
        empty_record = pd.DataFrame([],
                                    columns=[
                                        'oddsratio', 'p_value', 'adj_p',
                                        '-lgp', 'left_hit', 'left_no_hit',
                                        'right_hit', 'right_no_hit',
                                        'left_hit_rate', 'right_hit_rate',
                                        'Node'
                                    ])
        return empty_record

    # get table
    motif_ids = dmr_annot.var_names
    # calculate motif occurence, not considering hits here
    left = (left_dmr_annot[:, motif_ids].X > 0).sum(axis=0)
    left_total = left_dmr_annot.shape[0]

    right = (right_dmr_annot.X > 0).sum(axis=0)
    right_total = right_dmr_annot.shape[0]

    tables = {}
    for motif, _left, _right in zip(motif_ids, left.A1, right.A1):
        table = [[_left, left_total - _left], [_right, right_total - _right]]
        tables[motif] = table
    return tables

In [10]:
table_records = {}
for pair_id in adata.obs_names:
    print(pair_id)
    a, b = adata.obs.loc[pair_id]
    data = prepare_table(pair_id)
    table_records[(a, b)] = data

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060


In [11]:
joblib.dump(table_records, 'PairMotifCountTables.lib')

['PairMotifCountTables.lib']

## Test

In [5]:
def test_one_pair(a, b, tables):
    # do test
    results = {}    
    for motif, t in tables.items():
        odds, p = stats.fisher_exact(t, alternative='two-sided')    
        results[motif] = {'oddsratio': odds, 'p_value': p}
    motif_enrich_df = pd.DataFrame(results).T

    # p value correction
    _, p, _, _ = multipletests(motif_enrich_df['p_value'], method='fdr_bh')
    motif_enrich_df['adj_p'] = p
    motif_enrich_df['-lgp'] = -np.log10(motif_enrich_df['adj_p']).replace(
        -np.inf, -300)

    # assemble final results
    records = {}
    for motif, t in tables.items():
        tp, tn = t[0]
        fp, fn = t[1]
        tp_rate = tp / (tp + tn)
        fp_rate = fp / (fp + fn)
        records[motif] = dict(left_hit=tp,
                              left_no_hit=tn,
                              right_hit=fp,
                              right_no_hit=fn,
                              left_hit_rate=tp_rate,
                              right_hit_rate=fp_rate)
    counts = pd.DataFrame(records).T
    motif_enrich_df = pd.concat([motif_enrich_df, counts], axis=1, sort=True)
    motif_enrich_df['ClusterA'] = a
    motif_enrich_df['ClusterB'] = b
    
    # apply a minimum filter
    motif_enrich_df = motif_enrich_df[motif_enrich_df['-lgp'] > 2]

    return motif_enrich_df

In [6]:
table_records = joblib.load('PairMotifCountTables.lib')

In [7]:
temp_dir = 'TEMP'
pathlib.Path(temp_dir).mkdir(exist_ok=True)

In [None]:
with ProcessPoolExecutor(25) as executor:
    futures = {}
    for (a, b), tables in table_records.items():
        if not isinstance(tables, dict):
            continue
        output_path = f'{temp_dir}/{a}-{b}.msg'
        if pathlib.Path(output_path).exists():
            continue
        
        future = executor.submit(test_one_pair, a, b, tables)
        futures[future] = (a, b, output_path)
    
    for future in as_completed(futures):
        a, b, output_path = futures[future]
        data  = future.result()
        data.to_msgpack(output_path)
        print(a, b, data.shape[0], sep='\t')

  result = getattr(ufunc, method)(*inputs, **kwargs)
It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
  app.launch_new_instance()


Foxp2_Trpc7	MSN-D1_Khdrbs3	371


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D2_Casz1	299
Foxp2_Trpc7	Foxp2_Dchs2	276
Foxp2_Trpc7	D1L-Fstl4_Sipa1l2	323


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D2_Col14a1	282


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-Fstl4_Trps1	277


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D1_Hrh1	308


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-Fstl4_Grm3	358


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Trpc4	404


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D2_Slc24a2	329


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Pag1	455


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-PAL_Flrt2	489


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-Fstl4_Cadm1	501


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D1_Ntn1	385


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Kcnd3	407


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D2_Nrp2	406


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Gabbr2	476


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Mapk10	419


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	MSN-D1_Plxnc1	436


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-PAL_Plcxd3	515


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	CGE-Vip_Ntng1	456


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	OLF_Xkr6	441


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	D1L-Fstl4_Crim1	517


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	Foxp2_Homer2	425


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	Foxp2_Inpp4b	343
Foxp2_Trpc7	CGE-Vip_Ptprm	425
Foxp2_Trpc7	CGE-Vip_Robo1	471
Foxp2_Trpc7	Unc5c_Unc5c	434
Foxp2_Trpc7	CGE-Lamp5_Nrxn3	375
Foxp2_Trpc7	Chd7_Kcnc2	404
Foxp2_Trpc7	Chd7_Trpc7	425


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	CGE-Lamp5_Dock5	461
Foxp2_Trpc7	CGE-Lamp5_Grid1	447


  result = getattr(ufunc, method)(*inputs, **kwargs)


Foxp2_Trpc7	CGE-Lamp5_Grk5	460
Foxp2_Trpc7	CGE-Lamp5_Sorcs1	434
Foxp2_Trpc7	CGE-Vip_Ccser1	395
Foxp2_Trpc7	CGE-Vip_Fstl4	406
Foxp2_Trpc7	Chd7_Megf11	412
Foxp2_Trpc7	CGE-Vip_Grm8	423
Foxp2_Trpc7	CGE-Vip_Galnt17	437
Foxp2_Trpc7	MGE-Pvalb_Gfra2	477
Foxp2_Trpc7	MGE-Pvalb_Ptprk	456
Foxp2_Trpc7	CGE-Vip_Clstn2	428
Foxp2_Trpc7	MGE-Sst_Unc5b	386
Foxp2_Trpc7	MGE-Pvalb_Cnih3	471
Foxp2_Trpc7	MGE-Pvalb_Entpd3	462
Foxp2_Trpc7	MGE-Pvalb_Cacna1i	482
Foxp2_Trpc7	MGE-Sst_Ptpre	437
Foxp2_Trpc7	MGE-Pvalb_Thsd7a	475
Foxp2_Trpc7	MGE-Pvalb_Sema5a	485
Foxp2_Trpc7	MGE-Sst_Rxra	431
Foxp2_Trpc7	PAL-Inh_Deptor	399
Foxp2_Trpc7	MGE-Sst_Dock4	443
Foxp2_Trpc7	PAL-Inh_Meis2	156
Foxp2_Trpc7	MGE-Sst_Bmper	441
Foxp2_Trpc7	MGE-Sst_Ubtd1	487
Foxp2_Trpc7	LSX-Inh_Cacna1i	434
Foxp2_Trpc7	LSX-Inh_Lats2	464
Foxp2_Trpc7	MGE-Sst_Rerg	443
Foxp2_Trpc7	PAL-Inh_Chat	412
Foxp2_Trpc7	MGE-Sst_Etv1	445
Foxp2_Trpc7	MGE-Sst_Kcnip4	465
Foxp2_Dchs2	MSN-D2_Nrp2	294
Foxp2_Dchs2	D1L-Fstl4_Cadm1	337
Foxp2_Trpc7	MGE-Sst_Frmd6	462
Foxp2_Trpc7	LSX-

MSN-D2_Casz1	PAL-Inh_Onecut2	458
D1L-Fstl4_Cadm1	D1L-Fstl4_Sipa1l2	466
MSN-D2_Casz1	PAL-Inh_Igdcc3	416
MSN-D2_Casz1	PAL-Inh_Ptprd	494
D1L-Fstl4_Cadm1	D1L-PAL_Plcxd3	305
MSN-D2_Casz1	PAL-Inh_Rarb	473
D1L-Fstl4_Cadm1	D1L-PAL_Flrt2	278
D1L-Fstl4_Cadm1	OLF_Trpc4	474
MSN-D2_Casz1	MGE-Sst_Chodl	424
D1L-Fstl4_Cadm1	OLF_Mapk10	424
D1L-Fstl4_Cadm1	OLF_Xkr6	447
D1L-Fstl4_Cadm1	OLF_Kcnd3	406
D1L-Fstl4_Cadm1	OLF_Pag1	433
D1L-Fstl4_Cadm1	OLF_Gabbr2	361
D1L-Fstl4_Cadm1	Foxp2_Inpp4b	383
D1L-Fstl4_Cadm1	CGE-Vip_Ptprm	428
D1L-Fstl4_Cadm1	CGE-Vip_Ntng1	392
D1L-Fstl4_Cadm1	Foxp2_Homer2	311
D1L-Fstl4_Cadm1	CGE-Vip_Robo1	425
D1L-Fstl4_Cadm1	CGE-Lamp5_Grid1	462
D1L-Fstl4_Cadm1	Unc5c_Unc5c	436
D1L-Fstl4_Cadm1	CGE-Lamp5_Dock5	477
D1L-Fstl4_Cadm1	CGE-Lamp5_Grk5	481
D1L-Fstl4_Cadm1	CGE-Lamp5_Nrxn3	430
D1L-Fstl4_Cadm1	Chd7_Kcnc2	421
D1L-Fstl4_Cadm1	CGE-Lamp5_Sorcs1	432
D1L-Fstl4_Cadm1	Chd7_Trpc7	463
D1L-Fstl4_Cadm1	Chd7_Megf11	497
D1L-Fstl4_Cadm1	MGE-Pvalb_Ptprk	515
D1L-Fstl4_Cadm1	CGE-Vip_Ccser1	443
D1L-Fstl4_C

MSN-D2_Col14a1	MGE-Pvalb_Ptprk	422
MSN-D2_Col14a1	MGE-Sst_Ptpre	406
MSN-D2_Col14a1	PAL-Inh_Deptor	411
MSN-D2_Col14a1	MGE-Sst_Rxra	428
MSN-D2_Col14a1	MGE-Pvalb_Sema5a	438
MSN-D2_Col14a1	MGE-Sst_Bmper	403
MSN-D2_Col14a1	MGE-Pvalb_Thsd7a	407
MSN-D2_Col14a1	MGE-Pvalb_Cacna1i	461
MSN-D2_Col14a1	LSX-Inh_Lats2	462
MSN-D1_Khdrbs3	MSN-D1_Hrh1	336
MSN-D2_Col14a1	MGE-Sst_Kcnip4	417
MSN-D2_Col14a1	MGE-Sst_Ubtd1	442
MSN-D2_Col14a1	MGE-Sst_Etv1	424
MSN-D2_Col14a1	MGE-Sst_Dock4	415
MSN-D2_Col14a1	LSX-Inh_Cacna1i	470
MSN-D2_Col14a1	PAL-Inh_Meis2	119
MSN-D2_Col14a1	LSX-Inh_Dock10	451
MSN-D2_Col14a1	PAL-Inh_Chat	434
MSN-D2_Col14a1	LSX-Inh_Foxp2	458
MSN-D1_Khdrbs3	D1L-Fstl4_Grm3	499
MSN-D2_Col14a1	MGE-Sst_Frmd6	449
MSN-D1_Khdrbs3	D1L-Fstl4_Trps1	470
MSN-D2_Col14a1	MGE-Sst_Rerg	446
MSN-D2_Col14a1	LSX-Inh_Zeb2	484
MSN-D2_Col14a1	LSX-Inh_Nxph1	454
MSN-D1_Khdrbs3	MSN-D1_Plxnc1	477
MSN-D2_Col14a1	LSX-Inh_Enox1	482
MSN-D2_Col14a1	PAL-Inh_Tcf7l2	431
MSN-D1_Khdrbs3	MSN-D1_Ntn1	477
MSN-D2_Col14a1	PAL-Inh_Meis1	50

D1L-Fstl4_Grm3	CGE-Lamp5_Dock5	401
D1L-Fstl4_Grm3	CGE-Lamp5_Grid1	403
D1L-Fstl4_Grm3	Chd7_Kcnc2	400
D1L-Fstl4_Grm3	Chd7_Trpc7	439
D1L-Fstl4_Grm3	CGE-Vip_Ccser1	403
D1L-Fstl4_Grm3	CGE-Vip_Fstl4	407
D1L-Fstl4_Grm3	Chd7_Megf11	416
D1L-Fstl4_Grm3	CGE-Vip_Clstn2	416
D1L-Fstl4_Grm3	CGE-Vip_Galnt17	428
D1L-Fstl4_Grm3	CGE-Vip_Grm8	423
D1L-Fstl4_Grm3	MGE-Pvalb_Gfra2	496
D1L-Fstl4_Grm3	MGE-Pvalb_Entpd3	428
D1L-Fstl4_Grm3	MGE-Sst_Unc5b	407
D1L-Fstl4_Grm3	MGE-Pvalb_Cnih3	496
D1L-Fstl4_Grm3	MGE-Pvalb_Ptprk	482
D1L-Fstl4_Grm3	MGE-Sst_Ptpre	464
D1L-Fstl4_Grm3	MGE-Pvalb_Thsd7a	454
D1L-Fstl4_Grm3	MGE-Pvalb_Cacna1i	506
D1L-Fstl4_Grm3	MGE-Pvalb_Sema5a	490
D1L-Fstl4_Grm3	PAL-Inh_Deptor	446
D1L-Fstl4_Grm3	MGE-Sst_Ubtd1	456
D1L-Fstl4_Grm3	MGE-Sst_Rxra	397
MSN-D1_Plxnc1	MSN-D1_Ntn1	257
D1L-Fstl4_Grm3	MGE-Sst_Dock4	441
D1L-Fstl4_Grm3	MGE-Sst_Bmper	394
D1L-Fstl4_Grm3	LSX-Inh_Cacna1i	416
D1L-Fstl4_Grm3	MGE-Sst_Rerg	438
D1L-Fstl4_Grm3	MGE-Sst_Etv1	423
D1L-Fstl4_Grm3	MGE-Sst_Kcnip4	411
D1L-Fstl4_Grm3	LSX-Inh_Lats

OLF_Mapk10	MGE-Sst_Bmper	444
OLF_Mapk10	MGE-Sst_Unc5b	377
OLF_Mapk10	MGE-Sst_Kcnip4	474
OLF_Mapk10	MGE-Sst_Frmd6	455
OLF_Mapk10	PAL-Inh_Meis2	165
OLF_Mapk10	LSX-Inh_Lats2	356
OLF_Mapk10	LSX-Inh_Zeb2	410
OLF_Mapk10	PAL-Inh_Chat	358
OLF_Mapk10	LSX-Inh_Nxph1	441
OLF_Mapk10	LSX-Inh_Enox1	421
OLF_Mapk10	LSX-Inh_Cacna1i	326
OLF_Mapk10	PAL-Inh_Deptor	384
OLF_Mapk10	LSX-Inh_Foxp2	425
OLF_Pag1	OLF_Trpc4	399
OLF_Pag1	OLF_Kcnd3	346
OLF_Pag1	OLF_Gabbr2	367
OLF_Mapk10	LSX-Inh_Dock10	364
OLF_Mapk10	PAL-Inh_Meis1	476
OLF_Mapk10	PAL-Inh_Igdcc3	382
OLF_Mapk10	PAL-Inh_Tmem178	349
OLF_Mapk10	PAL-Inh_Ptprd	437
OLF_Mapk10	PAL-Inh_Tcf7l2	363
OLF_Pag1	D1L-PAL_Flrt2	385
OLF_Pag1	D1L-PAL_Plcxd3	419
OLF_Pag1	Foxp2_Homer2	421
OLF_Mapk10	MGE-Sst_Chodl	412
OLF_Pag1	D1L-Fstl4_Sipa1l2	465
OLF_Mapk10	PAL-Inh_Rarb	356
OLF_Mapk10	PAL-Inh_Onecut2	307
OLF_Pag1	Foxp2_Inpp4b	403
OLF_Pag1	CGE-Lamp5_Grk5	443
OLF_Pag1	CGE-Lamp5_Dock5	466
OLF_Pag1	Unc5c_Unc5c	463
OLF_Pag1	CGE-Vip_Ntng1	404
OLF_Pag1	CGE-Vip_Robo1	401
OLF_Pag1	C

In [None]:
total_enrichment = pd.concat([pd.read_msgpack(p) for p in pathlib.Path(temp_dir).glob('*msg')])
total_enrichment.to_msgpack('PairwiseMotifEnrichment.msg')

In [None]:
total_enrichment.head()