In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from goatools.obo_parser import GODag
from goatools.anno.genetogo_reader import Gene2GoReader
from scipy.stats import chi2_contingency, fisher_exact
from statsmodels.stats.multitest import multipletests

In [2]:
from dataManager import DataManager, LogManager

In [3]:
# Prepare the logger
logger = LogManager(is_active=True)

# Initialize the DataManager class and import all the data
DATA_PATH = "../../preparation/codice/"
HPO2GENES_PATH = f"{DATA_PATH}phenotype_to_genes.txt"
HPO2GENES_PROVA_PATH = f"{DATA_PATH}HPO2Genes_head.csv"
GO_ONTOLOGY_PATH = f"{DATA_PATH}go-basic.obo" 
GENE2GO_PATH = f"{DATA_PATH}gene2go"  # Path to the downloaded gene2go file

data_manager = DataManager()

print(DATA_PATH)

../../preparation/codice/


In [None]:
logger.log("Importing HPO2Genes file...")
data_manager.importHPO2GeneFile(HPO2GENES_PATH, L_bound = 50, R_bound = 75)
logger.log("HPO2Genes file imported.")
print(f"Shape:{data_manager.}")
print(data_manager.hpo_head())

Importing HPO2Genes file...
HPO2Genes file imported.
hpo_id        HP:0000011  HP:0000012  HP:0000013  HP:0000046  HP:0000066  \
ncbi_gene_id                                                               
16                     0           0           0           0           0   
18                     0           0           0           0           0   
19                     0           0           0           0           0   
21                     0           0           0           0           0   
22                     0           0           0           0           0   

hpo_id        HP:0000097  HP:0000110  HP:0000113  HP:0000114  HP:0000155  ...  \
ncbi_gene_id                                                              ...   
16                     0           0           0           0           0  ...   
18                     0           0           0           0           0  ...   
19                     0           0           0           0           0  ...   
21       

In [8]:
humanTaxID = 9606
GO_taxonomies = [humanTaxID]
logger.log("\nImporting GO2Genes file...")
data_manager.importGO2GeneFile(go_ontology_path=GO_ONTOLOGY_PATH, gene2go_path=GENE2GO_PATH, taxids = GO_taxonomies)
logger.log("GO2Genes file imported.")
print(data_manager.go_head())


Importing GO2Genes file...
../../preparation/codice/go-basic.obo: fmt(1.2) rel(2024-10-27) 44,017 Terms
**NOTE: DEFAULT TAXID STORED FROM gene2go IS 9606 (human)

HMS:0:03:19.954839 362,883 annotations, 20,819 genes, 18,767 GOs, 1 taxids READ: ../../preparation/codice/gene2go 
20785 IDs in loaded association branch, biological_process
GO2Genes file imported.
GO    GO:0000002  GO:0000009  GO:0000012  GO:0000014  GO:0000015  GO:0000016  \
Gene                                                                           
1              0           0           0           0           0           0   
2              0           0           0           0           0           0   
9              0           0           0           0           0           0   
10             0           0           0           0           0           0   
12             0           0           0           0           0           0   

GO    GO:0000017  GO:0000018  GO:0000019  GO:0000022  ...  GO:2001288  \
Gene

In [7]:
data_manager.hpo_head()

hpo_id,HP:0000003,HP:0000011,HP:0000012,HP:0000013,HP:0000022,HP:0000046,HP:0000056,HP:0000058,HP:0000066,HP:0000072,...,HP:3000036,HP:5200005,HP:5200010,HP:5200017,HP:5200018,HP:5200029,HP:5200123,HP:5201016,HP:6000231,HP:6000531
ncbi_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Statistical Test (Chi2 or Fisher)

In [None]:
data_manager.compute_significance(self, hpo_column:str, go_columns:list = None,
                              method:str = "chi2", only_significant:bool=True,
                              correction:str = None):