In [1]:
import pandas as pd
import os
import yaml
import numpy as np
from sklearn.metrics import precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

In [2]:
glycolysis8450_summary = pd.read_csv('test-oi2-outputs/test2/glycolysis8450-pathway-summary.txt', sep='\t')
glycolysis8450_summary['hash'] = glycolysis8450_summary['Name'].str.extract(r'params-([^.\/]+)')
glycolysis8450_summary = glycolysis8450_summary.drop(columns=['Name'])
glycolysis8450_summary = glycolysis8450_summary[['hash'] + [col for col in glycolysis8450_summary.columns if col != 'hash']]
glycolysis8450_summary.head()

Unnamed: 0,hash,Number of nodes,Number of undirected edges,Number of connected components,Nodes in prize
0,24QHVS5,0,0,0,0
1,2ETE5TC,0,0,0,0
2,2VM5MQL,9,8,1,8
3,2WECIOD,9,8,1,8
4,3XS3BUE,8,7,1,7


In [3]:
glycolysis8450_empty_hashes = glycolysis8450_summary[glycolysis8450_summary['Number of nodes']==0].hash.tolist()

In [4]:
metabolism3385_summary = pd.read_csv('test-oi2-outputs/test2/metabolism3385-pathway-summary.txt', sep='\t')
metabolism3385_summary['hash'] = metabolism3385_summary['Name'].str.extract(r'params-([^.\/]+)')
metabolism3385_summary = metabolism3385_summary.drop(columns=['Name'])
metabolism3385_summary = metabolism3385_summary[['hash'] + [col for col in metabolism3385_summary.columns if col != 'hash']]
metabolism3385_summary.head()

Unnamed: 0,hash,Number of nodes,Number of undirected edges,Number of connected components,Nodes in prize
0,24QHVS5,25,24,1,24
1,2ETE5TC,0,0,0,0
2,2VM5MQL,25,24,1,24
3,2WECIOD,25,24,1,24
4,3XS3BUE,28,27,1,22


In [5]:
metabolism3385_empty_hashes = metabolism3385_summary[metabolism3385_summary['Number of nodes']==0].hash.tolist()

In [6]:
purine1035_summary = pd.read_csv('test-oi2-outputs/test2/purine1035-pathway-summary.txt', sep='\t')
purine1035_summary['hash'] = purine1035_summary['Name'].str.extract(r'params-([^.\/]+)')
purine1035_summary = purine1035_summary.drop(columns=['Name'])
purine1035_summary = purine1035_summary[['hash'] + [col for col in purine1035_summary.columns if col != 'hash']]
purine1035_summary.head()

Unnamed: 0,hash,Number of nodes,Number of undirected edges,Number of connected components,Nodes in prize
0,24QHVS5,39,38,1,39
1,2ETE5TC,41,40,1,37
2,2VM5MQL,39,38,1,39
3,2WECIOD,39,38,1,39
4,3XS3BUE,43,42,1,38


In [7]:
purine1035_empty_hashes = purine1035_summary[purine1035_summary['Number of nodes']==0].hash.tolist()

In [8]:
glycosphingolipid4993_summary = pd.read_csv('test-oi2-outputs/test2/glycosphingolipid4993-pathway-summary.txt', sep='\t')
glycosphingolipid4993_summary['hash'] = glycosphingolipid4993_summary['Name'].str.extract(r'params-([^.\/]+)')
glycosphingolipid4993_summary = glycosphingolipid4993_summary.drop(columns=['Name'])
glycosphingolipid4993_summary = glycosphingolipid4993_summary[['hash'] + [col for col in glycosphingolipid4993_summary.columns if col != 'hash']]
glycosphingolipid4993_summary.head()

Unnamed: 0,hash,Number of nodes,Number of undirected edges,Number of connected components,Nodes in prize
0,24QHVS5,0,0,0,0
1,2ETE5TC,0,0,0,0
2,2VM5MQL,3,2,1,3
3,2WECIOD,0,0,0,0
4,3XS3BUE,3,2,1,3


In [9]:
glycosphingolipid4993_empty_hashes = glycosphingolipid4993_summary[glycosphingolipid4993_summary['Number of nodes']==0].hash.tolist()

In [10]:
common_empty_hash = set(purine1035_empty_hashes) & set(metabolism3385_empty_hashes) & set(glycolysis8450_empty_hashes) & set(glycosphingolipid4993_empty_hashes)
print("Common strings:", sorted(list(common_empty_hash)))

Common strings: ['G6QWKUF', 'H5EGTCE', 'I5EKII6', 'JHH34ZE', 'KMHTT5N', 'NIXQON6', 'O3SYN4K', 'ON3OUHY', 'R2T74G3', 'RCXKAY7', 'Z2C4Y5G']


In [11]:
yaml_files = [f for f in os.listdir('test-oi2-outputs/test2') if f.endswith('.yaml')]
for file in yaml_files:
    path = os.path.join('test-oi2-outputs/test2', file)
    with open(path, 'r') as f:
        content = yaml.safe_load(f)
        print(f"--- Contents of {file} ---")
        print(content)
        print()

--- Contents of parameters-omicsintegrator2-params-G6QWKUF.yaml ---
{'b': 1, 'g': 5, 'w': 3}

--- Contents of parameters-omicsintegrator2-params-H5EGTCE.yaml ---
{'b': 1, 'g': 5, 'w': 5}

--- Contents of parameters-omicsintegrator2-params-I5EKII6.yaml ---
{'b': 0.5, 'g': 3, 'w': 1}

--- Contents of parameters-omicsintegrator2-params-JHH34ZE.yaml ---
{'b': 0.5, 'g': 5, 'w': 3}

--- Contents of parameters-omicsintegrator2-params-KMHTT5N.yaml ---
{'b': 0.5, 'g': 3, 'w': 3}

--- Contents of parameters-omicsintegrator2-params-NIXQON6.yaml ---
{'b': 0.5, 'g': 0, 'w': 1}

--- Contents of parameters-omicsintegrator2-params-O3SYN4K.yaml ---
{'b': 0.5, 'g': 5, 'w': 5}

--- Contents of parameters-omicsintegrator2-params-ON3OUHY.yaml ---
{'b': 0.5, 'g': 5, 'w': 1}

--- Contents of parameters-omicsintegrator2-params-R2T74G3.yaml ---
{'b': 0.5, 'g': 3, 'w': 5}

--- Contents of parameters-omicsintegrator2-params-RCXKAY7.yaml ---
{'b': 0.5, 'g': 0, 'w': 3}

--- Contents of parameters-omicsintegrator2-