In [1]:
import torch
import numpy as np
import pandas as pd

from hypothesis_tests import chatterjee, benjamini_hochberg, NEWCORR, COR, DCOR, HSIC, HHG, TIC

In [2]:
competitors = [
    COR,
    DCOR,
    # HSIC,
    HHG,
    TIC,
]

In [3]:
spellman = pd.read_csv("data/spellman_gene_expr_data.csv", header=0)
genes = list(spellman.columns[1:])

In [4]:
competitor_genes = torch.zeros(len(genes))
for method in competitors:
    pvals = torch.tensor(np.genfromtxt(f"results/spellman/{method}_pvalues.txt"))
    rejects_idx = benjamini_hochberg(pvals)
    for i in rejects_idx:
        competitor_genes[i] = 1

In [5]:
chatterjee_genes = torch.zeros(len(genes))
pvals = torch.zeros(len(genes))
x = torch.tensor(spellman["time"].values)
for i, gene in enumerate(genes):
    y = torch.tensor(spellman[gene].values)
    pvals[i] = chatterjee(x, y, compute_pvalue=True)[1]
rejects_idx = benjamini_hochberg(pvals)
for i in rejects_idx:
    chatterjee_genes[i] = 1

In [6]:
print(chatterjee_genes.sum())
print(competitor_genes.sum())

tensor(586.)
tensor(703.)


In [7]:
# Genes that were selected by Chatterjee but not comptitors.
selected = torch.nn.functional.relu(chatterjee_genes - competitor_genes)
selected.sum()

tensor(302.)

In [8]:
lookup = {}
for i, gene in enumerate(genes):
    lookup[gene] = i

In [9]:
picked = ["YOR308C", "YJL115W", "YGR177C", "YPR119W", "YKL127W", "YHR143W"]

for gene in picked:
    print(gene)
    print(chatterjee_genes[lookup[gene]])
    print(competitor_genes[lookup[gene]])
    print("---------------------------------")

YOR308C
tensor(1.)
tensor(0.)
---------------------------------
YJL115W
tensor(1.)
tensor(0.)
---------------------------------
YGR177C
tensor(1.)
tensor(0.)
---------------------------------
YPR119W
tensor(1.)
tensor(0.)
---------------------------------
YKL127W
tensor(1.)
tensor(0.)
---------------------------------
YHR143W
tensor(1.)
tensor(0.)
---------------------------------


In [10]:
not_picked = [
    "YOR140W",
    "YCL021W",
    "YJR086W",
    "YLR406C",
    "YLR283W",
    "YOR378W",
]
for gene in not_picked:
    print(gene)
    print(chatterjee_genes[lookup[gene]])
    print(competitor_genes[lookup[gene]])
    print("---------------------------------")

YOR140W
tensor(0.)
tensor(0.)
---------------------------------
YCL021W
tensor(0.)
tensor(0.)
---------------------------------
YJR086W
tensor(0.)
tensor(1.)
---------------------------------
YLR406C
tensor(0.)
tensor(0.)
---------------------------------
YLR283W
tensor(0.)
tensor(0.)
---------------------------------
YOR378W
tensor(0.)
tensor(0.)
---------------------------------
