In [1]:
import nease
import pandas as pd

### Input of NEASE

In [2]:
# RNA-Seq data processed fron:  https://doi.org/10.1186/S13059-017-1286-Z
# Additional file 4 from the paper: Differential exon usage between DCM patients and controls.
# Prepare the standard input for DCM data
table_dcm=pd.read_json('AS data/dcm.json')
table_dcm.head(10)

Unnamed: 0,Gene stable ID,new_start,new_end,beta
0,ENSG00000154263,69314431,69315425,-0.105828
1,ENSG00000154265,69314431,69315425,-0.105828
2,ENSG00000005471,87411893,87412033,0.135682
3,ENSG00000197150,151031241,151031291,0.150763
4,ENSG00000124574,43431751,43432141,0.107582
5,ENSG00000114770,183983753,183984894,-0.109897
6,ENSG00000117528,94478524,94478723,-0.1055
7,ENSG00000136754,26763880,26763960,0.132983
8,ENSG00000138443,203382189,203382206,0.10239
9,ENSG00000097007,130835066,130835525,-0.259706


### Run NEASE

In [3]:
# Run nease with standard input.
events=nease.run(table_dcm, organism='Human')


		Data Summary
**************************************************
191 protein domains are affected by AS.
95 of the affected domains have known interactions.
985 protein interactions/binding affected.

**************************************************
Running enrichment analysis...
NEASE enrichment done.


### Classic gene enrichment

In [4]:
# This is an enrichement at gene level: all (diff) spliced genes.
# The function use the library gseapy and need gseapy_databases parameter 
# let first check what libraries are available on gseapy for Human

import gseapy as gp
Human = gp.get_library_name(database='Human')
Human[:10]

['ARCHS4_Cell-lines',
 'ARCHS4_IDG_Coexp',
 'ARCHS4_Kinases_Coexp',
 'ARCHS4_TFs_Coexp',
 'ARCHS4_Tissues',
 'Achilles_fitness_decrease',
 'Achilles_fitness_increase',
 'Aging_Perturbations_from_GEO_down',
 'Aging_Perturbations_from_GEO_up',
 'Allen_Brain_Atlas_10x_scRNA_2021']

In [5]:
# Run on KEGG gene set
events.classic_enrich(gseapy_databases=['KEGG_2019_Human']).head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2019_Human,Oxytocin signaling pathway,20/153,1e-06,0.000248,0,0,3.966191,54.487228,CAMK2B;CAMK2D;CAMK1D;PLA2G4B;CACNA2D3;PRKAG2;NFATC1;CACNA1C;ADCY1;ADCY6;CAMKK2;GNAO1;CACNB2;RCAN1;MYL6;PLCB4;MAPK1;PPP1R12B;PLCB1;PRKACB
1,KEGG_2019_Human,Adrenergic signaling in cardiomyocytes,19/145,2e-06,0.000248,0,0,3.973184,52.287538,CAMK2B;CAMK2D;CACNA2D3;CREM;ATP2B4;CACNA1C;PPP2R5C;ADCY1;ADRA1A;ADCY6;CACNB2;PLCB4;CREB3L2;AKT1;MAPK1;PLCB1;PRKACB;CREB5;RAPGEF4
2,KEGG_2019_Human,cAMP signaling pathway,21/212,4.6e-05,0.003428,0,0,2.895085,28.885419,CHRM2;CAMK2B;GABBR1;CAMK2D;BDNF;PTCH1;PDE4D;PTGER3;PDE4C;ATP2B4;NFATC1;ADCY1;CACNA1C;ADCY6;CREB3L2;AKT1;PLCE1;MAPK1;PRKACB;RAPGEF4;CREB5
3,KEGG_2019_Human,Aldosterone synthesis and secretion,13/98,7e-05,0.003428,0,0,4.005304,38.34505,CAMK2B;SCARB1;CAMK2D;CAMK1D;ATP2B4;CACNA1C;ADCY1;ADCY6;PLCB4;CREB3L2;PLCB1;PRKACB;CREB5
4,KEGG_2019_Human,Cholinergic synapse,14/112,7.2e-05,0.003428,0,0,3.743795,35.71819,CHRM2;CAMK2B;CAMK2D;CACNA1C;ADCY1;ADCY6;GNAO1;PLCB4;CREB3L2;AKT1;MAPK1;PLCB1;PRKACB;CREB5
5,KEGG_2019_Human,Insulin secretion,12/86,8e-05,0.003428,0,0,4.243428,40.044645,CAMK2B;CAMK2D;PLCB4;CREB3L2;KCNMA1;ADCY1;CACNA1C;PLCB1;PRKACB;ADCY6;RAPGEF4;CREB5
6,KEGG_2019_Human,Morphine addiction,12/91,0.000139,0.005118,0,0,3.97382,35.295379,GNAO1;GABBR1;PDE1C;GRK4;PDE4D;PDE1A;PDE4C;ARRB1;ADCY1;GABRE;PRKACB;ADCY6
7,KEGG_2019_Human,Dilated cardiomyopathy (DCM),11/91,0.00056,0.015659,0,0,3.592047,26.898165,CACNB2;ITGB4;ITGA1;CACNA2D3;DMD;ADCY1;CACNA1C;IGF1;PRKACB;ADCY6;TTN
8,KEGG_2019_Human,"Parathyroid hormone synthesis, secretion and action",12/106,0.000583,0.015659,0,0,3.337087,24.850156,PLCB4;PDE4D;CREB3L2;PDE4C;MAPK1;ARRB1;ADCY1;PLCB1;PRKACB;ADCY6;FGFR1;CREB5
9,KEGG_2019_Human,Cortisol synthesis and secretion,9/65,0.00065,0.015659,0,0,4.192328,30.767223,SCARB1;PLCB4;CREB3L2;ADCY1;CACNA1C;PLCB1;PRKACB;ADCY6;CREB5


### NEASE analysis


In [6]:
# Get list of all protein domais affected by AS.
# A link to DIGGER database for lost/gained domain and their binding.

events.get_domains().head(10)

Unnamed: 0,Gene name,NCBI gene ID,Gene stable ID,Exon stable ID,Pfam ID,dPSI,Interacting domain,Visualization link
0,ZC3HAV1,56829,ENSG00000105939,ENSE00001939369,PF02825,-0.675285,True,https://exbio.wzw.tum.de/digger/ID/exon/ENSE00001939369
1,PLXNA4,91584,ENSG00000221866,ENSE00001477854,PF01403,-0.57901,False,
2,ALS2,57679,ENSG00000003393,ENSE00001844466,PF00415,-0.510964,True,https://exbio.wzw.tum.de/digger/ID/exon/ENSE00001844466
3,XRN1,54464,ENSG00000114127,ENSE00001810566,PF17846,-0.479034,False,
4,ZNF655,79027,ENSG00000197343,ENSE00003602592,PF01352,-0.458799,False,
5,NFASC,23114,ENSG00000163531,ENSE00001556394,PF07679,-0.433055,False,
6,MCM9,254394,ENSG00000111877,ENSE00001356493,PF00493,-0.428684,True,https://exbio.wzw.tum.de/digger/ID/exon/ENSE00001356493
7,DENND1B,163486,ENSG00000213047,ENSE00001196169,PF03455,-0.425418,False,
8,ALG13,79868,ENSG00000101901,ENSE00003760070,PF04101,-0.411372,False,
9,NUP98,4928,ENSG00000110713,ENSE00001527022,PF04096,-0.41095,True,https://exbio.wzw.tum.de/digger/ID/exon/ENSE00001527022


In [7]:
# List of affected interactions (domain binding)

events.get_edges().head(10)

Unnamed: 0,Gene name,NCBI gene ID,dPSI,Pfam ID,Number of affected interactions,Affected binding,Affected binding (NCBI)
0,WWP2,11060,-0.185343,PF00397,88,"DVL2,SAFB,U2SURP,DVL1,MKNK2,TAF15,RBM28,SRSF5,RBM17,RBMX,HNRNPL,RBM34,HNRNPD,FLNA,PABPC4,G3BP1,CPSF6,U2AF2,SRSF2,HNRNPA1,EWSR1,DAZAP1,ITCH,POLR2A,CPSF7,SRSF1,HNRNPA0,HNRNPA2B1,SRPK1,ELAVL1,IQGAP1,EIF3B,SNRNP70,ARRDC4,NONO,SRSF10,IGF2BP2,NCBP2,WWP1,SRPK2,OGT,SYNCRIP,IGF2BP1,CSNK2A1,TJP1,CDK9,U2AF1,HNRNPR,HNRNPF,HNRNPA3,SFPQ,SRSF7,TRA2A,ALYREF,SRSF3,PATJ,HNRNPH3,HNRNPM,SRSF6,TXNIP,RALY,ARRDC3,HNRNPC,POLR2C,HNRNPH1,MPP5,TAGLN2,NIFK,SART3,LIN7C,RPAP3,SHC1,ZNF638,SF3B4,PTBP1,RBM14,EIF3G,PDLIM7,WWP2,PABPC1,NCL,SNRPB2,NELFE,LARP7,G3BP2,ARRDC1,RBM10,RBM39",1856629423350287281485513164308499127316319123029318423168761101461105211338642731782130265288373754307986964261094931816732199488268662662591947484110772106442291611059673384731049210642145770821025730710236318522098864216432298961018964281020731899908467064311062822913575613183543231876439884078436597335532779657646427332102625725104328666926011060269864691662979365157418559271482419584
1,ESR2,2100,-0.30243,PF00104,78,"MINK1,SCYL1,SIX4,TAOK2,ADNP,MKNK2,TLK2,NCOA3,ESRRB,WNK1,CTNNB1,NCOA2,NKX2,CDK12,STK26,CIT,NCAM2,MID1,CUX1,SCYL2,ARNT,FNDC3B,MAPKAPK2,MARK2,TRIO,CDC42BPB,PRKCI,ESR2,SRPK1,ZHX2,MAPK1,GAK,PRAG1,CDK13,CAMK2D,NR2F2,ARVCF,CDK11B,CDK19,EIF2AK2,SRPK2,RPS6KB1,CSNK1A1,PEAK1,CSNK2A1,DAPK3,CERS2,CDK9,TTK,NCOA1,LMX1B,CSNK1G3,KPNA1,MYLK2,PRPF4B,CLK3,PKP4,EPHA4,MAPK3,PBX1,PTPRF,ESR1,TBK1,CDC42BPG,DYRK1A,PTPRK,RPS6KB2,PIK3R4,BMP2K,MEOX2,ZHX3,NR2F6,OXSR1,VRK2,CDK8,ZFHX3,MARK3,RPS6KA4",504885741051804934423394287211011820221036512514991049915929651755517651111346854281152355681405647789261201172049578558421006732228825594258015728586218177026421984230975610673361981452798341457161329956102572728648401014563836853668899119885022043559550875792209929110555611859579661993084955589422323051206399437444102446341408986
2,CLK1,1195,0.140012,PF00069,69,"SPEN,HNRNPA1,DNAJC13,RLF,DBR1,PTPN22,GRM6,MYOM1,SFMBT2,SAFB,WDR66,TRA2B,PPP1CB,MAP3K21,KIF14,SRSF1,YWHAG,AC020929.1,STIP1,GEMIN5,MFHAS1,HNRNPA0,SRPK1,UNC13B,SRSF5,PATZ1,IMPDH2,HNRNPA3,CENPE,ISG15,BAG3,SVOP,SRSF7,UBL5,CDK12,PRPF4B,FAT1,RBMX,STK31,HTATSF1,TRA2A,MKRN2,CTNND2,CELSR3,CLK1,H2BC12,SRSF3,THY1,CLK2,DNAJC24,RAPH1,CWC27,ABCA13,HIVEP1,RBM15,ZNF148,RASL12,PPIG,ANKFY1,CUL3,NCL,PPFIBP2,TJP2,ARHGEF12,PHLDB2,SRSF6,SRPK2,KIF23,RBM39",317860185116323317230132619129168736577136294144406643455006431844519928642675325773110963259299258109496732104976430235983615220988106296369531555306432592865175594932195889956164273362989623609273161501195185236642870701196120526650591028315466430966478377075128551479845246918495941423365901026733936095841195
3,FGFR1,2260,0.119626,PF00047,53,"STAT3,LTBR,RPS6KA1,NECTIN1,SLA,L1CAM,SRC,JAK2,PLCG1,CRKL,LRRN2,TNS2,AGER,AKT1,CDK9,VAV1,CRK,PTK6,NCK2,PTPN11,MFHAS1,SHB,OPCML,CTNNB1,DLK1,RAET1E,PIK3R1,FGFR2,IL27RA,PIK3R2,TNFRSF10B,FCGRT,ANOS1,CD79B,SHC1,CEACAM21,PTPN1,HEPACAM2,SHE,PLCG2,PDGFRB,FGFR1,RASA1,SCN3B,AP006333.1,EPHA4,YES1,NCK1,ITK,NCAM1,SH3BP2,RPS6KA3,GRB14",6774405561955818650338976714371753356197139910446233711772071025740913985753844057819258646149781499878813525052952263946652968795221737309746464902735770253012126669533651595921558002376920437525469037024684645222602888
4,MYO19,80179,-0.154152,PF00063,51,"LMO7,MYO1E,ARPC1B,ACTR10,CORO1B,TPM4,MYL6,AHNAK,CORO2A,MAPK14,DAPK3,ACTR2,MYL12A,MYLK,TJP1,SPTBN1,MYO5B,SIPA1,MYO6,CORO1C,SPTBN2,ACTG1,PPP1R9B,IQGAP1,TPM3,ACTN1,ACTR3,BMP2K,MYO5C,MYH9,CTTN,COBL,MYO18A,MYO1B,TPM1,SPTAN1,MYO1C,MYO10,MYO5A,ACTN4,PHKG2,INF2,MYL10,WDR1,TJP2,ERBIN,MYH10,GNB2,MYL2,PPP1R9A,TPM2",400846431009555860571757171463779026746416131009714321062746387082671146456494464623603671271846878826717087100965558955930462720172324239968744307168670946414651464481526164423934089948941455914462827834633556077169
5,XIAP,331,-0.112442,PF00653,41,"NOD2,CASP2,IKBKE,RIPK4,TAB1,CASP6,CASP8,TBK1,AKT1,XIAP,CDK1,CASP9,BIRC2,BCL10,RFFL,RIPK2,NOD1,CASP4,BMPR2,STRADB,MDM2,DIABLO,BIRC7,MAP3K2,CHEK1,RIPK3,NEK6,CASP7,BIRC5,MAP3K3,CASP12,ACVR1B,BRCA1,TRAF2,LATS1,SIAH1,CASP10,BIRC3,APAF1,GSK3B,CASP3",6412796418355410110454839841291102073319838423298915117584876710392837659554374193566167944410746111111035107838403324215100506742916727186911364778433303172932836
6,SPTAN1,6709,0.139279,PF00435,35,"SOS1,SPTBN5,CDK2,MYH7,PLEC,KALRN,CAPN1,TJP1,SPTBN1,CDK9,SPTB,PLEKHA4,AC020929.1,SPTBN2,EHBP1L1,SMURF1,IQGAP1,DYRK1A,GAK,SAV1,PLEKHA5,ABL1,GRK5,MYO5C,MYH9,FLNA,PLS1,MYO18A,MYO1C,MAPK6,MAPRE1,MAPK9,MYO19,ANLN,YAP1",66545339513321017462589978237082671110256710576645773167122541025715488261859258060485544772528695593046272316535739968746415597229195601801795444310413
7,RBM5,10181,0.171681,PF00076,27,"CASK,WDR77,SNAI1,NXF1,MEOX1,FBXW11,PRPF8,PHF19,U2SURP,LSM8,PRPF19,CHD3,DHX15,ESR2,SNRPE,ZRSR2,DDX17,RBM17,LIG4,COPS5,CHD4,MEOX2,HNRNPL,SNRNP200,PKP2,RBM14,U2AF2",85737908466151048242222329110594261472335051691273391107166521006635823310521849913981109871108422331912302053181043211338
8,ZNF397,84307,0.146217,PF00096,26,"ZNF24,ZNF446,ZKSCAN4,ZNF500,ZSCAN29,ZSCAN30,ZNF410,ZNF394,ZNF444,ZKSCAN8,ZSCAN21,ZNF263,ZSCAN20,ZNF496,COP1,ZSCAN32,STK40,ZNF197,ZNF483,ZKSCAN3,LMO4,ZNF232,ZNF213,ZNF174,PRKAA2,ZKSCAN1",7572556633870322604814605010010146757862841245531177457589101277579848386432654925839311016815839980317854377757760772755637586
9,HERC2,8924,0.362158,PF00415,22,"RAB5A,HERC3,MARK2,TBK1,HERC1,STK11,SRPK1,AGAP3,CDC42,RAB5B,CTNNB1,STK33,LRRK2,RAB34,CDK12,AURKB,CLK2,MAPK6,PHKG2,CDK8,NEK11,HERC2",586889162011291108925679467321169889985869149912089265975838715175592121196559752611024798588924


### NEASE edge enrichment 

In [8]:
# Run nease enrichment on affected edges

events.enrich(database=['KEGG']).head(15)

NEASE enrichment for the pathway databases:
 ['KEGG']
Found 1 enriched pathways after multiple testing correction.



Unnamed: 0,Pathway ID,Pathway name,Source,Spliced genes (number of interactions affecting the pathway),p_value,adj p_value
0,path:hsa04080,Neuroactive ligand-receptor interaction - Homo sapiens (human),KEGG,"GABBR1 (1), CLK1 (1), ARRB1 (11), ADRA1A (2), GNAO1 (5)",1.3e-05,0.004114
1,path:hsa04261,Adrenergic signaling in cardiomyocytes - Homo sapiens (human),KEGG,"PPP2R5C (9), ESR2 (3), ERBB3 (2), TTN (1), USP49 (1), ZHX1 (1), GRK4 (1), MYO19 (6), CLK1 (1), SPTAN1 (1), ARRB1 (2), FGFR1 (1), XIAP (1), MAF (1), ADRA1A (2), CACNA1C (2), GNAO1 (4)",0.002597,0.416871
2,path:hsa04020,Calcium signaling pathway - Homo sapiens (human),KEGG,"HERC2 (1), CXCL12 (1), ESR2 (2), ERBB3 (5), TTN (1), USP49 (1), TNC (1), GRK4 (1), MYO19 (2), PTPN18 (1), ARRB1 (4), FGFR1 (3), ADRA1A (2), CACNA1C (2), GNAO1 (1)",0.010982,0.739595
3,path:hsa05110,Vibrio cholerae infection - Homo sapiens (human),KEGG,"SEC24A (1), SLC22A23 (1), ERBB3 (1), MAGIX (2), USP49 (1), WWP2 (1), MYO19 (3), CLK1 (1), SPTAN1 (1), FGFR1 (2)",0.01281,0.739595
4,path:hsa04514,Cell adhesion molecules (CAMs) - Homo sapiens (human),KEGG,"IL1RAP (1), CXCL12 (1), ESR2 (2), ERBB3 (4), PCDH1 (1), MBP (1), FGFR1 (3)",0.013329,0.739595
5,path:hsa04022,cGMP-PKG signaling pathway - Homo sapiens (human),KEGG,"PPP2R5C (3), ESR2 (3), ERBB3 (1), TTN (2), NFATC1 (1), NPR3 (1), ZHX1 (1), GRK4 (1), MYO19 (1), ELK4 (1), CLK1 (1), SPTAN1 (1), ARRB1 (4), FGFR1 (1), XIAP (1), PPP1R12B (1), MAF (1), ADRA1A (2), CACNA1C (1), GNAO1 (5)",0.013824,0.739595
6,path:hsa00310,Lysine degradation - Homo sapiens (human),KEGG,"ZNF644 (4), PRDM2 (1), PHF19 (4), BCL11A (1)",0.016714,0.766443
7,path:hsa04215,Apoptosis - multiple species - Homo sapiens (human),KEGG,"ZC3HAV1 (1), SPTAN1 (1), XIAP (11)",0.020239,0.812107
8,path:hsa05414,Dilated cardiomyopathy (DCM) - Homo sapiens (human),KEGG,"ERBB3 (1), TTN (1), USP49 (1), OBSCN (1), MYO19 (6), SPTAN1 (1), ARRB1 (1), CACNA1C (1)",0.033721,1.0
9,path:hsa04672,Intestinal immune network for IgA production - Homo sapiens (human),KEGG,"CXCL12 (2), ERBB3 (1), MBP (1), FGFR1 (1)",0.037732,1.0


### Pathway specific analysis

In [9]:
# Dilated cardiomyopathy (DCM) - Homo sapiens (human) is 7 on the list
# run path_analysis() with DCM pathway id for more details about diff. spliced genes are their affected interactions
events.path_analysis('path:hsa05414')

Enrichment of the pathway: Dilated cardiomyopathy (DCM) - Homo sapiens (human).

Overall p_value:  0.03372116585183135




Unnamed: 0,Spliced genes,NCBI gene ID,Gene knwon to be in the pathway,Percentage of edges associated to the pathway,p_value,Affected binding (edges),Affected binding (NCBI)
0,MYO19,80179,False,6/51,2e-06,"TPM4,ACTG1,TPM3,TPM1,MYL2,TPM2",7171717170716846337169
1,OBSCN,84033,False,1/2,0.014717,TTN,7273
2,USP49,25862,False,1/4,0.029217,PRKACA,5566
3,CACNA1C,775,True,1/4,0.029217,RYR2,6262
4,ERBB3,2065,False,1/18,0.124924,PRKACA,5566
5,ARRB1,408,False,1/21,0.144174,ADRB1,153
6,TTN,7273,True,1/24,0.163,MYBPC3,4607
7,SPTAN1,6709,False,1/35,0.228561,MYH7,4625


In [10]:
# the gene MYO10 and OBSCN looks interesting, let us see what domain got affected and if the psi is positive or negative
events.get_edges()[ events.get_edges() ['Gene name']=='MYO19' ]

Unnamed: 0,Gene name,NCBI gene ID,dPSI,Pfam ID,Number of affected interactions,Affected binding,Affected binding (NCBI)
4,MYO19,80179,-0.154152,PF00063,51,"LMO7,MYO1E,ARPC1B,ACTR10,CORO1B,TPM4,MYL6,AHNAK,CORO2A,MAPK14,DAPK3,ACTR2,MYL12A,MYLK,TJP1,SPTBN1,MYO5B,SIPA1,MYO6,CORO1C,SPTBN2,ACTG1,PPP1R9B,IQGAP1,TPM3,ACTN1,ACTR3,BMP2K,MYO5C,MYH9,CTTN,COBL,MYO18A,MYO1B,TPM1,SPTAN1,MYO1C,MYO10,MYO5A,ACTN4,PHKG2,INF2,MYL10,WDR1,TJP2,ERBIN,MYH10,GNB2,MYL2,PPP1R9A,TPM2",400846431009555860571757171463779026746416131009714321062746387082671146456494464623603671271846878826717087100965558955930462720172324239968744307168670946414651464481526164423934089948941455914462827834633556077169


### Visualization

In [11]:
# Finally we can visualize affected pathways with nease
# Again we run it on DCM pathway
# you can run this function several times to get a  better visualization

events.Vis_path('path:hsa05414',file='AS data/enrichment/',k=0.8)

Enrichment of the pathway: Dilated cardiomyopathy (DCM) - Homo sapiens (human).

Overall p_value:  0.03372116585183135


Visualization of the pathway generated in: AS data/enrichment/Dilated cardiomyopathy (DCM) - Homo sapiens (human).html


In [12]:
events.Vis_path('path:hsa04261',file='AS data/enrichment/',k=0.8)

Enrichment of the pathway: Adrenergic signaling in cardiomyocytes - Homo sapiens (human).

Overall p_value:  0.0025973275689903288


Visualization of the pathway generated in: AS data/enrichment/Adrenergic signaling in cardiomyocytes - Homo sapiens (human).html
