In [1]:
from data.pancreas import preprocessing
import giraffe 
from netZooPy.otter.otter import otter
import numpy as np
import pandas as pd

In [6]:
cache = True

In [7]:
if not cache:
    preprocessing.generate_data()

In [8]:
gtex = pd.read_csv("data/pancreas/raw/xprs_panc_gtex_snail.tsv", sep = '\t', index_col = 0)
tcga = pd.read_csv("data/pancreas/raw/xprs_panc_tcga_snail.tsv", sep = '\t', index_col = 0)
genes = [gene[0:15] for gene in gtex.index]
gtex = pd.DataFrame(gtex.to_numpy(), index = genes, columns = gtex.columns)
tcga = pd.DataFrame(tcga.to_numpy(), index = genes, columns = tcga.columns)
motif = pd.read_csv("data/pancreas/motif.txt", index_col = 0)
ppi = pd.read_csv("data/pancreas/ppi.csv", index_col = 0)

In [72]:
giraffe_gtex = giraffe.Giraffe(gtex.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(ppi.shape[0])).get_regulation()
pd.DataFrame(giraffe_gtex, index = gtex.index, columns = motif.columns).to_csv("data/pancreas/giraffe_gtex.csv")

In [7]:
giraffe_tcga = giraffe.Giraffe(tcga.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(ppi.shape[0])).get_regulation()
pd.DataFrame(giraffe_tcga, index = gtex.index, columns = motif.columns).to_csv("data/pancreas/giraffe_tcga.csv")

In [8]:
tcga_otter = otter(motif.to_numpy().T, ppi.to_numpy() + np.eye(ppi.shape[0]), np.corrcoef(tcga.to_numpy()), Iter = 200)
pd.DataFrame(tcga_otter, index = motif.columns, columns = gtex.index).to_csv("data/pancreas/tcga_otter.csv")

In [9]:
gtex_otter = otter(motif.to_numpy().T, ppi.to_numpy() + np.eye(ppi.shape[0]), np.corrcoef(gtex.to_numpy()), Iter = 200)
pd.DataFrame(gtex_otter, index = motif.columns, columns = gtex.index).to_csv("data/pancreas/gtex_otter.csv")

# Oncogenes vs Tumor Suppressor Genes

In [272]:
giraffe_gtex = pd.read_csv("data/pancreas/giraffe_gtex.csv", index_col = 0)
giraffe_tcga = pd.read_csv("data/pancreas/giraffe_tcga.csv", index_col = 0)
census = pd.read_csv("data/pancreas/raw/Census_allThu Feb 2 22 18 29 2023.csv")
translate = pd.read_csv("data/pancreas/raw/gen_v26_mapping.csv")

In [273]:
df = census[census['Tier'] == 1][['Gene Symbol', 'Role in Cancer', 'Tumour Types(Somatic)']]
sign = []
for elem in census['Role in Cancer']:
    if isinstance(elem, float):
        sign.append(0)
        continue
    if 'TSG' in elem:
        sign.append(-1)
    elif 'oncogene' in elem:
        sign.append(1)
    else:
        sign.append(0)
df['sign'] = pd.DataFrame(sign)

In [283]:
scores = np.sum(giraffe_gtex - giraffe_tcga, axis = 1)

hit = 0
tot = 0
for gene in df['Gene Symbol']:
    if isinstance(df[df['Gene Symbol'] == gene]['Tumour Types(Somatic)'].values[0], float):
        continue
    if 'pancr' not in df[df['Gene Symbol'] == gene]['Tumour Types(Somatic)'].values[0]:
        continue
    if gene not in set(translate['gene_name']):
        continue
    translation = translate[translate['gene_name'] == gene]['gene_id'].values[0][0:15]
    if translation not in scores.index:
        continue
    gt = df[df['Gene Symbol'] == gene]['sign'].values[0]
    if gt == 0:
        continue
    real = scores[translation]
    tot += 1
    if real > 0 and gt == 1:
        hit += 1
    if real < 0 and gt == -1:
        hit += 1
print(str(hit) + " " + str(tot))

14 20


# TF Knockdown

In [267]:
giraffe_gtex = giraffe.Giraffe(gtex.to_numpy(), motif.to_numpy(), ppi.to_numpy() + np.eye(ppi.shape[0]), iterations = 50).get_regulation()

In [270]:
intervention = pd.read_csv("data/pancreas/raw/ZIC2.csv")[['Target Gene', 'Mean Expr. of Control', 'Mean Expr. of Treat']]
translate = pd.read_csv("data/pancreas/raw/gen_v26_mapping.csv")
intervention['diff'] = intervention[intervention.columns[1]] > intervention[intervention.columns[2]]

In [271]:
tf_index = list(motif.columns).index('ZIC2')
score = 0
tot = 0
a = 0
b = 0
aa = 0
bb = 0
for i in range(intervention.shape[0]):
    gene = intervention.iloc[i]['Target Gene']
    if intervention['Target Gene'][i] not in set(translate['gene_name']):
        continue
    translation = translate[translate['gene_name'] == intervention['Target Gene'][i]]['gene_id'].values[0][0:15]
    if translation not in gtex.index:
        continue
    tot += 1
    if giraffe_gtex[list(gtex.index).index(translation), tf_index] > 0:
        a += 1
        if intervention.iloc[i]['diff']:
            aa += 1
            score += 1
    else:
        b += 1
        if not intervention.iloc[i]['diff']:
            bb += 1
            score += 1
print(score / tot)

0.6863169280692266


In [None]:
'HOXB7': 0.67
'FOXO3': 0.53
'ZIC2': 0.69