In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.0


In [3]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/pyg_lib-0.4.0%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_cluster-1.6.3%2Bp

In [4]:
cd /content/drive/MyDrive/Me\ --\ Education/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda

/content/drive/MyDrive/Me -- Education/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


# Requirements

In [5]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

In [6]:
logger = logging.getLogger(__name__)

In [7]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
from src.optimization import DiseaseAssessClassifierTrainer, DiseaseAssessClassifierTester
from src.models import DPosClassifier, DPosClassifierFactory
from src.data import MicrobeDiseaseAssociationData, MicrobeDiseaseAssociationTrainTestSpliter
from src.features import get_associations, get_entities
from src.utils import train_test_sampler
from base import cross_validation, OptimizerConfig


# Classification

## Data

In [10]:
associations = get_associations()

train_indices, test_indices = train_test_sampler(associations.shape[0], 0.7)

data = MicrobeDiseaseAssociationData(associations)

train_data = MicrobeDiseaseAssociationData(associations.iloc[train_indices])
test_data = MicrobeDiseaseAssociationData(associations.iloc[test_indices])

## Classifier

In [11]:
microbe_ids = get_entities().loc[get_entities()['type'] == 'Microbe']['id'].tolist()
disease_ids = get_entities().loc[get_entities()['type'] == 'Disease']['id'].tolist()

In [12]:
mda_classifier = DPosClassifier(microbe_ids, disease_ids)

2024-02-22 18:39:14,225 [INFO] Initializing AssessClassifier
2024-02-22 18:39:14,238 [INFO] Initializing MatrixFeatureExtractor


## Optimizer

In [13]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.exp_name = "assess optimizer"
classifier_optimizer_config.device = device
classifier_optimizer_config.threshold = 0.5

## Train Test Approach

### Train

In [14]:
train_result = DiseaseAssessClassifierTrainer().train(model=mda_classifier,
                                                          data=train_data,
                                                          config=classifier_optimizer_config)

2024-02-22 18:39:19,522 [INFO] Call Training with assess optimizer
2024-02-22 18:39:19,524 [INFO] Building DPosClassifier
2024-02-22 18:39:19,535 [INFO] Calling build with associations :      disease  microbe  increased
538    64149    51285          0
45     43621    44108          1
116    33293    57582          1
290     9724    31268          1
262    33293    13641          1
..       ...      ...        ...
705    31069    41953          0
752    64642    45697          0
872    43372      926          0
131    10506    14120          1
81     43621    55588          1

[628 rows x 3 columns]
2024-02-22 18:39:19,624 [INFO] interaction matrix with shape (5179, 5645) has built
2024-02-22 18:39:19,921 [INFO] mask matrix with shape (5179, 5645) has built. This matrix shows not non elements.
2024-02-22 18:39:21,244 [INFO] interaction has been imputed to delete nans
2024-02-22 18:39:21,304 [INFO] pos : [654, 12403, 20066, 33293, 43621, 44112, 48777, 50863]
2024-02-22 18:39:21,322 [INF

### Test

In [15]:
test_result = DiseaseAssessClassifierTester().test(model=mda_classifier,
                                                       data=test_data,
                                                       config=classifier_optimizer_config)

2024-02-22 18:39:23,589 [INFO] Call Testing with assess optimizer
2024-02-22 18:39:23,613 [INFO] Result on Test Data : {'AUC': 0.8163450437077354, 'ACC': 0.8185185185185185, 'F1 Score': 0.8171919691589172, 'AUPR': 0, 'Loss': 0}


In [16]:
test_result.get_result()

{'AUC': 0.8163450437077354,
 'ACC': 0.8185185185185185,
 'F1 Score': 0.8171919691589172,
 'AUPR': 0,
 'Loss': 0}

## Cross Validation

In [18]:
trainer = DiseaseAssessClassifierTrainer()
tester = DiseaseAssessClassifierTester()
factory = DPosClassifierFactory(microbe_ids, disease_ids)
spliter = MicrobeDiseaseAssociationTrainTestSpliter(data.associations)
cross_validation(k=5, data_size=data.associations.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-22 18:42:38,590 [INFO] Initializing AssessClassifierFactory
2024-02-22 18:42:38,592 [INFO] Initializing MicrobeDiseaseAssociationTrainTestSpliter
2024-02-22 18:42:38,596 [INFO] Start 5-fold Cross Validation with config : assess optimizer
2024-02-22 18:42:38,598 [INFO] ---- Fold 1 ----
2024-02-22 18:42:38,601 [INFO] Initializing AssessClassifier
2024-02-22 18:42:38,602 [INFO] Initializing MatrixFeatureExtractor
2024-02-22 18:42:38,606 [INFO] Call Training with assess optimizer
2024-02-22 18:42:38,608 [INFO] Building DPosClassifier
2024-02-22 18:42:38,617 [INFO] Calling build with associations :      disease  microbe  increased
0      50863    33211          1
1      43621    40832          1
2      33293    47880          1
3      13213    53186          1
4      33293    14909          1
..       ...      ...        ...
893    64642    53920          0
894    25026    60601          0
895    25026    44316          0
896    31069    60226          0
897    64642     4251       

<base.evaluation.Result at 0x7b1b15670220>