# **Summary of Workflow for Integrative approach:**


1.Based on the binding profiles for STAT1, STAT2, IRF1 and IRF9, TF-specific gene lists were prepared (e.g. STAT1-target genes).

2.The motif file containing GAS-only, ISRE-only and composite containing genes were generated (by Agata and Kasia).

3.The integration analysis was performed using up-regulated, P-adjusted(P<0.05) genes from RNA-seq and the peaks from CHIP-seq. Briefly, BETA tool was run using this script(nested_all_beta.sh), followed by extraction of upregulated genes from all time points and combining all files into a single one (cat file1.txt file2.txt file3.txt | sort > file4.txt). Then, 
the integrative gene list was initially refined by the identification of overlapping genes between the integrative list and the gene lists from step 1, followed by the detection of integrative genes that share a motif site based on the list from step 2. Accordingly, the number of refined integrative genes were 330 and 308, in IFNa- and IFNy-treated groups, respectively. Please check "clusters.ipynb" script (Refined gene list) for the latest integrative gene list.



In [1]:
import pandas as pd
import numpy as np
import os
import glob

In [None]:
# DISPLAY DATAFRAMES AS INTERACTIVE TABLES
#%load_ext google.colab.data_table

  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")


# Preparing the TF-specific genes (To make annotate folder)


In [None]:
# This code bloc is repeated for each TF

tf = pd.read_csv("peaks_afterIDR/IFNg_allPeaks/IRF9_IFNy_allPeaksSet_nR_topScore_5.txt", sep="\t", header=None, skiprows=[0])
tf_bd = tf.iloc[:,0:6].copy()
tf_bd.to_csv("padj05/IRF9_peaks.bed", sep="\t", header=None, index=False)  # this file is used in "perl ../../homer/bin/annotatePeaks.pl IRF1_peaks.bed hg38 > IRF1_output.txt"
print(type(tf_bd)); print(tf_bd.shape)


<class 'pandas.core.frame.DataFrame'>
(2240, 6)


In [None]:
# Extract gene name from homer-generated annotation file
# # This code bloc is repeated for each TF

hmr = pd.read_csv("padj05/peaks_IFNy/tST2_output.txt", sep="\t")
hmr_g = hmr[hmr['Annotation'].str.contains("promoter|5' UTR", regex=True)]
hmr_g = hmr_g[['Gene Name']]
hmr_g.to_csv("padj05/peaks_IFNy/tST2_genes.txt", index=False, header=None, sep="\t")
print("dim before: ", hmr.shape ,"\n", "dim after: ", hmr_g.shape) 

dim before:  (2584, 19) 
 dim after:  (278, 1)


# Prepare the motif files

In [None]:
dfa = pd.read_csv("padj05/composite_GAS_ISRE_allMatrices_IFNy_ScoresTest_10Perc_motifs_integrated_promoter_final_forNetworks.csv", sep=";")
print(dfa.shape); print(dfa.columns)

(183, 4)
Index(['composite', 'GAS', 'ISRE', 'Unnamed: 3'], dtype='object')


In [None]:
# removing extra columns
dfa_1 = dfa[dfa.columns[0:3]]
print(dfa_1.columns); print(dfa_1.shape); print(type(dfa_1))

Index(['composite', 'GAS', 'ISRE'], dtype='object')
(183, 3)
<class 'pandas.core.frame.DataFrame'>


In [None]:
dfa_1.value_counts()

composite  ISRE     GAS     
 APOL1      ACOT7    A2M        1
 APOL2      ACSL5    AASS       1
 LGALS3BP   FKBP11   DPAGT1     1
 MDK        FYCO1    EDEM2      1
 MVB12A     GBP1     EFNA1      1
                               ..
 IFI6       DNAJA1   CDC7       1
 IFIT5      DNPEP    CNTNAP1    1
 IFITM1     DUSP16   CREBRF     1
 IFITM3     EARS2    CREM       1
 ZC3HAV1    MX2      IRAK4      1
Length: 64, dtype: int64

In [None]:
# Multiple columns into one column
dfa_2 = dfa_1.stack().reset_index()
print(dfa_2.columns); print(dfa_2.head(3))

Index(['level_0', 'level_1', 0], dtype='object')
   level_0    level_1       0
0        0  composite   APOL1
1        0        GAS     A2M
2        0       ISRE   ACOT7


In [None]:
# Creating IF condition
dfa_2.loc[dfa_2['level_1'] == "composite", "motfcol"] = "blue"
dfa_2.loc[dfa_2['level_1'] == "GAS", "motfcol"] = "green"
dfa_2.loc[dfa_2['level_1'] == "ISRE", "motfcol"] = "orange"
dfa_2.rename(columns={0: "symbol"}, inplace=True)
dfa_2['symbol'] = dfa_2['symbol'].str.strip()
dfa_2.head()

Unnamed: 0,level_0,level_1,symbol,motfcol
0,0,composite,APOL1,blue
1,0,GAS,A2M,green
2,0,ISRE,ACOT7,orange
3,1,composite,APOL2,blue
4,1,GAS,AGFG2,green


In [None]:
dfa_2.to_csv("padj05/motifs_IFNy.tsv", sep="\t", index=False)

# Finding Mutual genes between the integrative list & motif genes

In [None]:
# load integrated gene list 
df = pd.read_csv("padj05/IFNy/IFNy_genes.tsv", sep="\t")
print(df.shape); print(df.columns)

(1017, 1)
Index(['symbol'], dtype='object')


In [None]:
# load the motif file (prepared above)
# dfa_2
print(dfa_2.shape); print(dfa_2.columns)

(319, 4)
Index(['level_0', 'level_1', 'symbol', 'motfcol'], dtype='object')


In [None]:
# merging 
df_mrg = pd.merge(df, dfa_2, on='symbol')
print(df_mrg.shape); print(df_mrg.columns)

(319, 4)
Index(['symbol', 'level_0', 'level_1', 'motfcol'], dtype='object')


In [None]:
df_mrg.to_csv("padj05/IFNy/IFNy_MRGgene.tsv", sep="\t", index=False)

# Overlapping antibody-specific genes with the integrative list

In [None]:
# load the integrative list (prepared above)
# df_mrg
df_mrg.columns

Index(['symbol', 'level_0', 'level_1', 'motfcol'], dtype='object')

In [None]:
# ST1
st1 = pd.read_csv("padj05/IFNy/annotate/pST1_genes.txt", header=None, names=["symbol"], sep="\t")
st1['symbol'] = st1['symbol'].str.strip()
st1_mrg = pd.merge(df_mrg, st1, on="symbol")
st1_mrg.drop_duplicates(subset=['symbol'], inplace=True, ignore_index=True)
#st1_mrg.to_csv("padj05/IFNa/IFNa_MRGgene_pST1.tsv", sep="\t", index=False)
st1_mrg.shape

(187, 4)

In [None]:
st1_mrg['symbol'].to_clipboard(sep="\t", index=False, header=None)

In [None]:
# ST2
st2 = pd.read_csv("padj05/IFNy/annotate/tST2_genes.txt", header=None, names=["symbol"], sep="\t")
st2['symbol'] = st2['symbol'].str.strip()
st2_mrg = pd.merge(df_mrg, st2, on="symbol")
st2_mrg.drop_duplicates(subset=['symbol'], inplace=True, ignore_index=True)
#st2_mrg.to_csv("padj05/IFNa/IFNa_MRGgene_pST2.tsv", sep="\t", index=False)
st2_mrg.shape

(59, 4)

In [None]:
st2_mrg['symbol'].to_clipboard(sep="\t", header=None, index=False)

In [None]:
# IRF1
irf1 = pd.read_csv("padj05/IFNy/annotate/IRF1_genes.txt", header=None, names=["symbol"], sep="\t")
irf1['symbol'] = irf1['symbol'].str.strip()
irf1_mrg = pd.merge(df_mrg, irf1, on="symbol")
irf1_mrg.drop_duplicates(subset=['symbol'], inplace=True, ignore_index=True)
#irf1_mrg.to_csv("padj05/IFNa/IFNa_MRGgene_IRF1.tsv", sep="\t", index=False)
irf1_mrg.shape

(168, 4)

In [None]:
irf1_mrg['symbol'].to_clipboard(sep="\t", index=False, header=None)

In [None]:
# IRF9
irf9 = pd.read_csv("padj05/IFNy/annotate/IRF9_genes.txt", header=None, names=["symbol"], sep="\t")
irf9['symbol'] = irf9['symbol'].str.strip()
irf9_mrg = pd.merge(df_mrg, irf9, on="symbol")
irf9_mrg.drop_duplicates(subset=['symbol'], inplace=True, ignore_index=True)
#irf9_mrg.to_csv("padj05/IFNa/IFNa_MRGgene_IRF9.tsv", sep="\t", index=False)
irf9_mrg.shape

(88, 4)

In [None]:
irf9_mrg['symbol'].to_clipboard(sep="\t", index=False, header=None)

# Task: Finding overlaps between IFNa & IFNy

In [None]:
ifa = pd.read_csv("padj05/composite_GAS_ISRE_allMatrices_IFNa_ScoresTest_10Perc_motifs_integrated_promoter_final_forNetworks.csv", sep=";")
ifa = ifa[ifa.columns[0:3]]
ify = pd.read_csv("padj05/composite_GAS_ISRE_allMatrices_IFNy_ScoresTest_10Perc_motifs_integrated_promoter_final_forNetworks.csv", sep=";")
ify = ify[ify.columns[0:3]]
print("IFNa: ", ifa.shape, "\t", "IFNy: ", ify.shape)

IFNa:  (151, 3) 	 IFNy:  (183, 3)


In [None]:
print(ifa.head(3)); print(ify.head(3))

  composite    ISRE     GAS
0     APOL1   ACOT7     A2M
1     APOL2   ACSL5    AASS
2     APOL6    ACY3   AGFG2
  composite     GAS    ISRE
0     APOL1     A2M   ACOT7
1     APOL2   AGFG2   ACSL5
2     APOL6     AGT    ACY3


In [None]:
# Composites
com_mu = pd.merge(ifa.iloc[:,0], ify.iloc[:,0], on="composite", how="inner")
com_mu = com_mu[~com_mu['composite'].isnull()]

com_a = pd.merge(ifa.iloc[:,0], ify.iloc[:,0], on="composite", how="left")
com_a = com_a[~com_a['composite'].isnull()]

com_y = pd.merge(ifa.iloc[:,0], ify.iloc[:,0], on="composite", how="right")
com_y = com_y[~com_y['composite'].isnull()]

com = pd.concat([com_mu, com_a, com_y], axis=1, keys=["Mutual", "IFNa", "IFNy"])

print("Composite: mutual", com_mu.shape, "\t", "only IFNa: ", com_a.shape, "\t", 
      "only IFNy", com_y.shape)

Composite: mutual (43, 1) 	 only IFNa:  (64, 1) 	 only IFNy (55, 1)


In [None]:
com.to_csv("padj05/Composite_IFNa_y.tsv", sep="\t", index=False)

In [None]:
# ISRE
com_mu = pd.merge(ifa.iloc[:,1], ify.iloc[:,2], on="ISRE", how="inner")
com_mu = com_mu[~com_mu['ISRE'].isnull()]

com_a = pd.merge(ifa.iloc[:,1], ify.iloc[:,2], on="ISRE", how="left")
com_a = com_a[~com_a['ISRE'].isnull()]

com_y = pd.merge(ifa.iloc[:,1], ify.iloc[:,2], on="ISRE", how="right")
com_y = com_y[~com_y['ISRE'].isnull()]

com = pd.concat([com_mu, com_a, com_y], axis=1, keys=["Mutual", "IFNa", "IFNy"])

print("ISRE: mutual", com_mu.shape, "\t", "only IFNa: ", com_a.shape, "\t", 
      "only IFNy", com_y.shape)

ISRE: mutual (89, 1) 	 only IFNa:  (135, 1) 	 only IFNy (127, 1)


In [None]:
com.to_csv("padj05/ISRE_IFNa_y.tsv", sep="\t", index=False)

In [None]:
# GAS
com_mu = pd.merge(ifa.iloc[:,2], ify.iloc[:,1], on="GAS", how="inner")
com_mu = com_mu[~com_mu['GAS'].isnull()]

com_a = pd.merge(ifa.iloc[:,2], ify.iloc[:,1], on="GAS", how="left")
com_a = com_a[~com_a['GAS'].isnull()]

com_y = pd.merge(ifa.iloc[:,2], ify.iloc[:,1], on="GAS", how="right")
com_y = com_y[~com_y['GAS'].isnull()]

com = pd.concat([com_mu, com_a, com_y], axis=1, keys=["Mutual", "IFNa", "IFNy"])

print("GAS: mutual", com_mu.shape, "\t", "only IFNa: ", com_a.shape, "\t", 
      "only IFNy", com_y.shape)

GAS: mutual (98, 1) 	 only IFNa:  (151, 1) 	 only IFNy (146, 1)


In [None]:
com.to_csv("padj05/GAS_IFNa_y.tsv", sep="\t", index=False)

In [None]:
# To get unique values see https://stackoverflow.com/questions/23460345/selecting-unique-rows-between-two-dataframes-in-pandas

# Task: Networks for overlapping genes between IFNa & IFNy

In [2]:
# my directory
!pwd && ls

/media/dell/Elements/MAH/dhmg/beta/TFs/IDR
 allPeaksSet_IFNy_top_score.bed			  Integ_genes_Final
 allPeaksSet_nR_allAntibodies_IFNy_topScore.bed   Networks.ipynb
 allPeaksSet_nR_allAntibodies_IFNy_topScore.txt   padj05
 clusters					 'RNA-seq data'
 IFNy


## IFNa

In [3]:
ovr = pd.read_csv("padj05/ovrlp/ovrlp_genes.txt", sep="\t")
ovr.head(2)

Unnamed: 0,Names,total,elements,Unnamed: 3,IFNa,110,AASS,Unnamed: 7,IFNy,88,ANP32E
0,IFNa IFNy,220.0,A2M,,,,ANAPC4,,,,APOL3
1,,,ACOT7,,,,ANKFY1,,,,ARF6


In [4]:
# Overlaped genes
ovr['elements'].shape

(220,)

In [5]:
# IFNa ST1
st1 = pd.read_csv("padj05/IFNa/IFNa_MRGgene_pST1.tsv", sep="\t")
st1.head(2)


Unnamed: 0,symbol,level_0,level_1,motfcol
0,EPSTI1,17,composite,blue
1,ANKFY1,5,ISRE,orange


In [14]:
st1_ov_a = st1[st1['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfs1 = pd.DataFrame(columns=columns)
dfs1['node2'] = st1_ov_a
dfs1['#node1'] = "STAT1"

# Saving Network File
#dfs1.to_csv("padj05/ovrlp/IFNa_ST1_ov.tsv", sep="\t", index=False)

In [15]:
# IFNa ST2
st2 = pd.read_csv("padj05/IFNa/IFNa_MRGgene_pST2.tsv", sep="\t") 
st2_ov_a = st2[st2['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfs2 = pd.DataFrame(columns=columns)
dfs2['node2'] = st2_ov_a
dfs2["#node1"] = "STAT2"

#dfs2.to_csv("padj05/ovrlp/IFNa_ST2_ov.tsv", sep="\t", index=False)

In [9]:
# IFNa IRF1
ir1 = pd.read_csv("padj05/IFNa/IFNa_MRGgene_IRF1.tsv", sep="\t")
ir1_ov_a = ir1[ir1['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfir1 = pd.DataFrame(columns = columns)
dfir1['node2'] = ir1_ov_a
dfir1['#node1'] = "IRF1"

dfir1.to_csv("padj05/ovrlp/IFNa_IRF1_ov.tsv", sep="\t", index=False)

In [12]:
# IFNa IRF9
ir9 = pd.read_csv("padj05/IFNa/IFNa_MRGgene_IRF9.tsv", sep="\t")
ir9_ov_a = ir9[ir9['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfir9 = pd.DataFrame(columns=columns)
dfir9["node2"] = ir9_ov_a
dfir9["#node1"] = "IRF9"

dfir9.to_csv("padj05/ovrlp/IFNa_IRF9_ov.tsv", index= False, sep="\t")

In [19]:
# To confrim
x = [dfs1, dfs2, dfir1, dfir9]
for i in x:
    print(i.shape)

(189, 2)
(149, 2)
(127, 2)
(97, 2)


## IFNg

In [21]:
ovr = pd.read_csv("padj05/ovrlp/ovrlp_genes.txt", sep="\t")
ovr.head(2)

Unnamed: 0,Names,total,elements,Unnamed: 3,IFNa,110,AASS,Unnamed: 7,IFNy,88,ANP32E
0,IFNa IFNy,220.0,A2M,,,,ANAPC4,,,,APOL3
1,,,ACOT7,,,,ANKFY1,,,,ARF6


In [22]:
# Overlaped genes
ovr['elements'].shape

(220,)

In [23]:
# IFNy ST1
st1 = pd.read_csv("padj05/IFNy/IFNy_MRGgene_pST1.tsv", sep="\t")
st1.head(2)


Unnamed: 0,symbol,level_0,level_1,motfcol
0,EPSTI1,16,composite,blue
1,TMEM126B,131,GAS,green


In [24]:
st1_ov_a = st1[st1['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfs1 = pd.DataFrame(columns=columns)
dfs1['node2'] = st1_ov_a
dfs1['#node1'] = "STAT1"

# Saving Network File
dfs1.to_csv("padj05/ovrlp/IFNy_ST1_ov.tsv", sep="\t", index=False)

In [None]:
##### NOT included
# IFNa ST2
st2 = pd.read_csv("padj05/IFNy/IFNy_MRGgene_tST2.tsv", sep="\t") 
st2_ov_a = st2[st2['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfs2 = pd.DataFrame(columns=columns)
dfs2['node2'] = st2_ov_a
dfs2["#node1"] = "STAT2"

dfs2.to_csv("padj05/ovrlp/IFNy_ST2_ov.tsv", sep="\t", index=False)

In [26]:
# IFNa IRF1
ir1 = pd.read_csv("padj05/IFNy/IFNy_MRGgene_IRF1.tsv", sep="\t")
ir1_ov_a = ir1[ir1['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfir1 = pd.DataFrame(columns = columns)
dfir1['node2'] = ir1_ov_a
dfir1['#node1'] = "IRF1"

dfir1.to_csv("padj05/ovrlp/IFNy_IRF1_ov.tsv", sep="\t", index=False)

In [27]:
# IFNa IRF9
ir9 = pd.read_csv("padj05/IFNy/IFNy_MRGgene_IRF9.tsv", sep="\t")
ir9_ov_a = ir9[ir9['symbol'].isin(ovr['elements'])].reset_index(drop=True).iloc[:,0]
columns = ["#node1", "node2"]
dfir9 = pd.DataFrame(columns=columns)
dfir9["node2"] = ir9_ov_a
dfir9["#node1"] = "IRF9"

dfir9.to_csv("padj05/ovrlp/IFNy_IRF9_ov.tsv", index= False, sep="\t")

In [28]:
# To confrim
x = [dfs1, dfs2, dfir1, dfir9]
for i in x:
    print(i.shape)

(144, 2)
(149, 2)
(129, 2)
(78, 2)


# Task: construction of customised networks

In [None]:
# To avoid running above commends, follow below
# IFNa
st1_mrg = pd.read_csv("padj05/IFNa/IFNa_MRGgene_pST1.tsv", sep="\t")
st2_mrg = pd.read_csv("padj05/IFNa/IFNa_MRGgene_pST2.tsv", sep="\t")
print(st1_mrg['level_1'].unique())
print(st1_mrg.columns)

['composite' 'ISRE' 'GAS']
Index(['symbol', 'level_0', 'level_1', 'motfcol'], dtype='object')


In [None]:
# Goal: GAF vs GAF-like on GAS & Composites genes
# generating GAF_like
st1_mrg_gc = st1_mrg[(st1_mrg['level_1'] == "GAS") | (st1_mrg['level_1'] == 'composite')]
st2_mrg_gc = st2_mrg[(st2_mrg['level_1'] == "GAS") | (st2_mrg['level_1'] == 'composite')]
s1_s2_gc = pd.merge(st1_mrg_gc, st2_mrg_gc, on="symbol", how="inner")
s1_s2_gc = s1_s2_gc[["symbol"]]
#s1_s2_gc.to_csv("padj05/Networks/IFNa_GAF_like_GC.tsv", sep="\t", index=False)

# sending GAF to clipboard
st1_mrg_gc['symbol'].to_clipboard(sep="\t", index=False, header=None)
s1_s2_gc.shape

(112, 1)

In [None]:
# Goal: GAF vs GAF-like on GAS genes
# generating GAF_like
st1_mrg_gas = st1_mrg[st1_mrg['level_1'] == "GAS"]
st2_mrg_gas = st2_mrg[st2_mrg['level_1'] == "GAS"]
s1_s2_gas = pd.merge(st1_mrg_gas, st2_mrg_gas, on="symbol", how="inner")
s1_s2_gas = s1_s2_gas[["symbol"]]
#s1_s2_gas.to_csv("padj05/Networks/IFNy_GAF_like.tsv", sep="\t", index=False)

# sending GAF to clipboard
st1_mrg_gas['symbol'].to_clipboard(sep="\t", index=False, header=None)
s1_s2_gas.shape

(10, 1)

In [None]:
# Goal:ISGF3 vs IRF1 on ISRE and Composite genes (after network construction, ISRE & composite genes are selected in Cytoscape)
# {(IRF1 U IRF9) intersection with (STAT1 intersection STAT2)} ISGF3
irf1_9 = pd.merge(irf1_mrg.iloc[:,0], irf9_mrg.iloc[:,0], on="symbol", how="outer")
st1_2 = pd.merge(st1_mrg.iloc[:,0], st2_mrg.iloc[:,0], on="symbol", how="inner")
isgf3 = pd.merge(st1_2, irf1_9, on="symbol", how="inner")
isgf3.to_clipboard(sep="\t", index=False, header=None)

# ISGF3 network with IRF1 is combined (in Cytoscape) to genearte ISGF3 vs IRF1

# Task: Networks for IFNy

In [None]:
# Goal: ST1-IRF1
st1 = pd.read_csv("IFNy_MRGgene_pST1.tsv", sep="\t")
irf1 = pd.read_csv("IFNy_MRGgene_IRF1.tsv", sep="\t")
irf9 = pd.read_csv("IFNy_MRGgene_IRF9.tsv", sep="\t")

In [None]:
st1.head(3)

Unnamed: 0,symbol,level_0,level_1,motfcol
0,EPSTI1,16,composite,blue
1,TMEM126B,131,GAS,green
2,ZC3HAV1,54,composite,blue


In [None]:

st1_g = st1.iloc[:,0]
irf1_g = irf1.iloc[:,0]
irf9_g = irf9.iloc[:,0]
for d in [st1_g, irf1_g, irf9_g]:
  print("dimension", d.shape)

dimension (187,)
dimension (168,)
dimension (88,)


In [None]:
print(st1_g.tail(3))
print(irf1_g.head(3))
print(irf1_g.tail(3))
print(irf9_g.head(3))
print(irf9_g.tail(3))

184     CDIPT
185    NEURL3
186     SSBP4
Name: symbol, dtype: object
0       OPTN
1    ZC3HAV1
2     RNF213
Name: symbol, dtype: object
165    USP18
166    PRRG4
167    MEIS1
Name: symbol, dtype: object
0     EPSTI1
1    ZC3HAV1
2     RNF213
Name: symbol, dtype: object
85     PSPH
86    CMPK2
87    USP18
Name: symbol, dtype: object


In [None]:
# Create dataframe
columns = ["#node1", "node2"]

dfg = pd.concat([st1_g, irf1_g], ignore_index=True)

# create dataframe
df = pd.DataFrame(columns=columns)

# Fill column
df["node2"] = dfg

df.iloc[0:187,0] = "STAT1"
df.iloc[187:,0] = "IRF1"

# Saving
df.to_csv("IFNy_ST1_IRF1.tsv", sep="\t", index=False)

df.iloc[185:188,:]

Unnamed: 0,#node1,node2
185,STAT1,NEURL3
186,STAT1,SSBP4
187,IRF1,OPTN


In [None]:
# Goal: ST1-IRF1_IRF9 

columns = ["#node1", "node2"]

dfg = pd.concat([st1_g, irf1_g, irf9_g], ignore_index=True)

df = pd.DataFrame(columns=columns)

df['node2'] = dfg

df.iloc[0:187,0] = "STAT1"
df.iloc[187:355,0] = "IRF1"
df.iloc[355:,0] = "IRF9"

# Saving and this file is imported as a network to cytoscape  
df.to_csv("IFNy_ST1_IRF1_IRF9.tsv", index=False, sep="\t")

# Task: Enrichment analysis

In [None]:
g = pd.read_csv("overlap_IFNa_y/jVenn_IFNa_IFNy_GAS.csv")
c = pd.read_csv("overlap_IFNa_y/jVenn_IFNa_IFNy_composite.csv")
i = pd.read_csv("overlap_IFNa_y/jVenn_IFNa_IFNy_ISRE.csv")
g.head(3)

Unnamed: 0,IFNa_GAS,IFNy_GAS,IFNa_GAS|IFNy_GAS
0,AASS,ANP32E,A2M
1,ANAPC4,ARF6,AGFG2
2,APOB,ASGR1,AGT


In [None]:
# Finding overlapping elements between two columns
# https://stackoverflow.com/questions/18079563/finding-the-intersection-between-two-series-in-pandas

s1 = pd.Series(g['IFNa_GAS']).dropna()
s2 = pd.Series(g['IFNy_GAS']).dropna()
pd.Series(np.intersect1d(s1,s2))

Series([], dtype: object)

In [None]:
# saving
g['IFNa_GAS|IFNy_GAS'].to_csv("overlap_IFNa_y/GAS.txt", sep="\t", index=False, header=None)
c['IFNa_comp|IFNy_comp'].to_csv("overlap_IFNa_y/Comp.txt", sep="\t", index=False, header=None)
i['IFNa_ISRE|IFNy_ISRE'].to_csv("overlap_IFNa_y/ISRE.txt", sep="\t", index=False, header=None)

In [None]:
pd.