In [1]:
import giraffe
import numpy as np
import pandas as pd

# BRCA
## Loading the data

In [2]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("BRCA_case.csv", index_col = 0)
expression_control = pd.read_csv("BRCA_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [18]:
motif = pd.read_csv("cancer_breast_otter_motif.csv", index_col = 0)
motif.sort_index(inplace=True)

expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

ppi = pd.read_csv("cancer_breast_otter_ppi.csv", index_col = 0)

## Inferring GIRAFFE GRNs

In [26]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [29]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [38]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/brca/network_case.tsv", sep="\t")

In [39]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/brca/network_control.tsv", sep="\t")

# CESC
## Loading the data

In [2]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("CESC_case.csv", index_col = 0)
expression_control = pd.read_csv("CESC_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [3]:
motif = pd.read_csv("cancer_cervix_otter_motif.csv", index_col = 0)
motif.sort_index(inplace=True)

expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

ppi = pd.read_csv("cancer_cervix_otter_ppi.csv", index_col = 0)

## Inferring GIRAFFE GRNs 

In [4]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [6]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [5]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/cesc/network_case.tsv", sep="\t")

In [7]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/cesc/network_control.tsv", sep="\t")

In [10]:
R_control

array([[-1.41590017e-05, -5.42589878e-05, -2.13047952e-06, ...,
        -1.15869830e-06, -6.58308352e-07, -1.10720403e-05],
       [ 1.40295315e-05,  5.35938598e-05,  4.70353780e-06, ...,
         3.68506630e-06,  2.90170965e-06,  1.14952563e-05],
       [-4.69008728e-06, -2.08475158e-05, -1.51473614e-07, ...,
         2.37834783e-07,  2.68224227e-07, -4.41910242e-06],
       ...,
       [-1.68665019e-06, -1.52168077e-05,  2.63134297e-07, ...,
         2.95587341e-07,  6.61290699e-07,  1.19812995e-07],
       [-1.62465331e-05, -6.49121503e-05, -2.94241158e-06, ...,
        -1.96299834e-06, -1.10652149e-06, -1.08949371e-05],
       [-1.84157088e-05, -6.99226875e-05, -3.28591864e-06, ...,
        -2.11574365e-06, -1.37013478e-06, -1.32230189e-05]], dtype=float32)

# LIHC
## Loading the data

In [3]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("LIHC_case.csv", index_col = 0)
expression_control = pd.read_csv("LIHC_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [5]:
motif = pd.read_csv("cancer_liver_otter_motif.csv", index_col = 0)
motif.sort_index(inplace=True)

expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

ppi = pd.read_csv("cancer_liver_otter_ppi.csv", index_col = 0)

## Inferring GIRAFFE GRNs

In [6]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [8]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy()).get_regulation()

In [7]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/lihc/network_case.tsv", sep="\t")

In [39]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/lihc/network_control.tsv", sep="\t")

# LUAD
## Loading the data

In [2]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("LUAD_case.csv", index_col = 0)
expression_control = pd.read_csv("LUAD_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [21]:
motif = pd.read_csv("motif.csv", index_col = 0)
ppi = pd.read_csv("ppi_matrix.csv", index_col = 0)

motif.sort_index(inplace=True)
expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

## Inferring GIRAFFE GRNs

In [29]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [31]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [30]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/luad/network_case.tsv", sep="\t")

In [32]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/luad/network_control.tsv", sep="\t")

# THCA
## Loading the data

In [3]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("THCA_case.csv", index_col = 0)
expression_control = pd.read_csv("THCA_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [4]:
motif = pd.read_csv("motif.csv", index_col = 0)
ppi = pd.read_csv("ppi_matrix.csv", index_col = 0)

motif.sort_index(inplace=True)
expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

## Inferring GIRAFFE GRNs

In [5]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [7]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [6]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/thca/network_case.tsv", sep="\t")

In [8]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/thca/network_control.tsv", sep="\t")

# PRAD
## Loading the data

In [2]:
gene_to_name = lambda gene : gene[0:15]

expression_case = pd.read_csv("PRAD_case.csv", index_col = 0)
expression_control = pd.read_csv("PRAD_nat.csv", index_col = 0)

expression_case.index = map(gene_to_name, expression_case.index)
expression_control.index = map(gene_to_name, expression_control.index)
expression_case.sort_index(inplace=True)
expression_control.sort_index(inplace=True)

In [3]:
motif = pd.read_csv("motif.csv", index_col = 0)
ppi = pd.read_csv("ppi_matrix.csv", index_col = 0)

motif.sort_index(inplace=True)
expression_case = expression_case.loc[expression_case.index.isin(motif.index),:]
expression_control = expression_control.loc[expression_control.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_case.index), :]

## Inferring GIRAFFE GRNs

In [4]:
R_case = giraffe.Giraffe(expression_case.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [5]:
pd.DataFrame(R_case, index=motif.index, columns=motif.columns).to_csv("data/prad/network_case.tsv", sep="\t")

In [6]:
R_control = giraffe.Giraffe(expression_control.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [None]:
pd.DataFrame(R_control, index=motif.index, columns=motif.columns).to_csv("data/lihc/network_control.tsv", sep="\t")

# LIHC SEX
## Loading the data

In [4]:
gene_to_name = lambda gene : gene[0:15]

expression_male = pd.read_csv("LIHC_male.csv", index_col = 0)
expression_female = pd.read_csv("LIHC_female.csv", index_col = 0)

expression_male.index = map(gene_to_name, expression_male.index)
expression_female.index = map(gene_to_name, expression_female.index)
expression_male.sort_index(inplace=True)
expression_female.sort_index(inplace=True)

In [8]:
motif = pd.read_csv("motif.csv", index_col = 0)
ppi = pd.read_csv("ppi_matrix.csv", index_col = 0)

motif.sort_index(inplace=True)
expression_male = expression_male.loc[expression_male.index.isin(motif.index),:]
expression_female = expression_female.loc[expression_female.index.isin(motif.index),:]
motif = motif.loc[motif.index.isin(expression_male.index), :]

## Inferring GIRAFFE GRNs

In [15]:
R_male = giraffe.Giraffe(expression_male.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [16]:
pd.DataFrame(R_male, index=motif.index, columns=motif.columns).to_csv("data/lihc_sex/network_malev1.tsv", sep="\t")

In [17]:
R_female = giraffe.Giraffe(expression_female.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(644)).get_regulation()

In [18]:
pd.DataFrame(R_female, index=motif.index, columns=motif.columns).to_csv("data/lihc_sex/network_femalev1.tsv", sep="\t")

In [31]:
motif_male = pd.read_csv("motif_male.txt", index_col = 0)
motif_male.sort_index(inplace=True)
expression_male = expression_male.loc[expression_male.index.isin(motif_male.index),:]
motif_male = motif_male.loc[motif_male.index.isin(expression_male.index), :]
motif_male = motif_male.reindex(sorted(motif_male.columns), axis=1)
ppi = ppi.loc[ppi.index.isin(motif_male.columns), ppi.columns.isin(motif_male.columns)]
ppi.sort_index(inplace=True)
ppi = ppi.reindex(sorted(ppi.columns), axis = 1)

In [37]:
motif_female = pd.read_csv("motif_female.txt", index_col = 0)
motif_female.sort_index(inplace=True)
expression_female = expression_female.loc[expression_female.index.isin(motif_female.index),:]
motif_female = motif_female.loc[motif_female.index.isin(expression_female.index), :]
motif_female = motif_female.reindex(sorted(motif_female.columns), axis=1)
ppi = ppi.loc[ppi.index.isin(motif_female.columns), ppi.columns.isin(motif_female.columns)]
ppi.sort_index(inplace=True)
ppi = ppi.reindex(sorted(ppi.columns), axis = 1)

In [40]:
R_male = giraffe.Giraffe(expression_male.to_numpy(), motif_male.to_numpy(), ppi.to_numpy() + np.eye(640)).get_regulation()

In [42]:
pd.DataFrame(R_male, index=motif_male.index, columns=motif_male.columns).to_csv("data/lihc_sex/network_malev2.tsv", sep="\t")

In [43]:
R_female = giraffe.Giraffe(expression_female.to_numpy(), motif_female.to_numpy(), ppi.to_numpy() + np.eye(640)).get_regulation()

In [45]:
pd.DataFrame(R_female, index=motif_female.index, columns=motif_female.columns).to_csv("data/lihc_sex/network_femalev2.tsv", sep="\t")