In [73]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [74]:
!pip install torch_geometric



In [75]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html


In [76]:
cd /content/drive/MyDrive/Academic/Topics/AI/Machine\ Learning\ Dr.\ Montazeri/Project/ml_mda

/content/drive/MyDrive/Academic/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


# Requirements

In [77]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

In [78]:
logger = logging.getLogger(__name__)

In [79]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [80]:
from src.optimization import MatrixFeatureBasedSklearnClassifierTrainer, MatrixFeatureBasedSklearnClassifierTester
from src.config import SklearnClassifierConfig
from src.models import JaccardSimilarityFeatureBasedSklearnClassifier, JaccardSimilarityFeatureBasedSklearnClassifierFactory
from src.data import MicrobeDiseaseAssociationData, MicrobeDiseaseAssociationTrainTestSpliter
from src.features import get_associations, get_entities
from src.utils import train_test_sampler
from base import cross_validation, OptimizerConfig


# Classification

## Data

In [81]:
associations = get_associations()

train_indices, test_indices = train_test_sampler(associations.shape[0], 0.7)

data = MicrobeDiseaseAssociationData(associations)

train_data = MicrobeDiseaseAssociationData(associations.iloc[train_indices])
test_data = MicrobeDiseaseAssociationData(associations.iloc[test_indices])

## Classifier

In [82]:
microbe_ids = get_entities().loc[get_entities()['type'] == 'Microbe']['id'].tolist()
disease_ids = get_entities().loc[get_entities()['type'] == 'Disease']['id'].tolist()

In [83]:
classifier_config = SklearnClassifierConfig()
classifier_config.classifier = 'AdaBoost'
classifier_config.random_state = 0
classifier_config.penalty = 'l2'
classifier_config.C = 0.5
classifier_config.n_estimators = 100
classifier_config.criterion = 'gini'
classifier_config.max_depth = 3

In [84]:
mda_classifier = JaccardSimilarityFeatureBasedSklearnClassifier(classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)

2024-02-12 08:13:38,629 [INFO] Initializing MatrixFeatureExtractor
2024-02-12 08:13:38,637 [INFO] Initializing SimilarityFeatureExtractor
2024-02-12 08:13:38,643 [INFO] Initializing MatrixFeatureBasedSklearnClassifier with model : None


## Optimizer

In [85]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.exp_name = "adam optimizer"
classifier_optimizer_config.threshold = 0.5

## Train Test Approach

### Train

In [86]:
train_result = MatrixFeatureBasedSklearnClassifierTrainer().train(model=mda_classifier, data=train_data,config=classifier_optimizer_config)

2024-02-12 08:13:38,700 [INFO] Call Training with adam optimizer
2024-02-12 08:13:38,720 [INFO] Calling build with associations :      disease  microbe  increased
536      654     8480          0
173    43621    45317          1
794    64149    10276          0
298    12403    23832          1
130    54370    29163          1
..       ...      ...        ...
764     1667    32606          0
336     7877    14120          1
787    40873     5135          0
355    12403    65813          1
655    11153    25176          0

[628 rows x 3 columns]
2024-02-12 08:13:38,846 [INFO] interaction matrix with shape (5179, 5645) has built
2024-02-12 08:13:39,393 [INFO] mask matrix with shape (5179, 5645) has built. This matrix shows not non elements.
2024-02-12 08:13:41,057 [INFO] interaction has been imputed to delete nans
2024-02-12 08:13:41,059 [INFO] Building Jaccard similarity for diseases
2024-02-12 08:13:51,107 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0

  similarity_matrix = dot_product / union


2024-02-12 08:13:52,244 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:13:52,256 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:13:58,425 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:13:58,455 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:13:58,572 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:13:59,035 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:13:59,040 [INFO] building data for classifier
2024-02-12 08:13:59,293 [INFO] shape of y : (628,), shape of X : (628, 10824)
2024-02-12 08:13:59,296 [INFO] fitting classifier : AdaBoost
2024-02-12 08:14:16,595 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

### Test

In [87]:
test_result = MatrixFeatureBasedSklearnClassifierTester().test(model=mda_classifier, data=test_data, config=classifier_optimizer_config)

2024-02-12 08:14:16,611 [INFO] Call Testing with adam optimizer
2024-02-12 08:14:17,519 [INFO] Result on Test Data : {'AUC': 0.9494539267877723, 'ACC': 0.8814814814814815, 'F1 Score': 0.880193022352876, 'AUPR': 0, 'Loss': 0}


In [88]:
test_result.get_result()

{'AUC': 0.9494539267877723,
 'ACC': 0.8814814814814815,
 'F1 Score': 0.880193022352876,
 'AUPR': 0,
 'Loss': 0}

In [89]:
mda_classifier.fe.microbe_similarity_matrix.sum(axis=0)[:100]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , 14.84285714,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , 33.43134921,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

## Cross Validation

In [90]:
trainer = MatrixFeatureBasedSklearnClassifierTrainer()
tester = MatrixFeatureBasedSklearnClassifierTester()
factory = JaccardSimilarityFeatureBasedSklearnClassifierFactory(model_config=classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)
spliter = MicrobeDiseaseAssociationTrainTestSpliter(data.associations)
cross_validation(k=5, data_size=data.associations.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-12 08:14:17,622 [INFO] Initializing SimilarityFeatureBasedSklearnClassifierFactory
2024-02-12 08:14:17,634 [INFO] Initializing MicrobeDiseaseAssociationTrainTestSpliter
2024-02-12 08:14:17,639 [INFO] Start 5-fold Cross Validation with config : adam optimizer
2024-02-12 08:14:17,644 [INFO] ---- Fold 1 ----
2024-02-12 08:14:17,648 [INFO] Initializing MatrixFeatureExtractor
2024-02-12 08:14:17,654 [INFO] Initializing SimilarityFeatureExtractor
2024-02-12 08:14:17,656 [INFO] Initializing MatrixFeatureBasedSklearnClassifier with model : None
2024-02-12 08:14:17,663 [INFO] Call Training with adam optimizer
2024-02-12 08:14:17,671 [INFO] Calling build with associations :      disease  microbe  increased
0      50863    33211          1
1      43621    40832          1
4      33293    14909          1
5      33293    35937          1
6      12403    26565          1
..       ...      ...        ...
891    55164    18341          0
892    22068    20153          0
894    25026    60601 

  similarity_matrix = dot_product / union


2024-02-12 08:14:30,294 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:14:30,305 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:14:36,839 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:14:36,869 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:14:36,985 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:14:37,385 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:14:37,390 [INFO] building data for classifier
2024-02-12 08:14:37,660 [INFO] shape of y : (719,), shape of X : (719, 10824)
2024-02-12 08:14:37,663 [INFO] fitting classifier : AdaBoost
2024-02-12 08:14:56,456 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

  similarity_matrix = dot_product / union


2024-02-12 08:15:06,052 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:06,057 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:15:11,148 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:11,185 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:15:11,318 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:11,839 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:11,847 [INFO] building data for classifier
2024-02-12 08:15:12,302 [INFO] shape of y : (719,), shape of X : (719, 10824)
2024-02-12 08:15:12,305 [INFO] fitting classifier : AdaBoost
2024-02-12 08:15:31,742 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

  similarity_matrix = dot_product / union


2024-02-12 08:15:41,354 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:41,361 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:15:46,628 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:46,657 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:15:46,771 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:47,179 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:15:47,184 [INFO] building data for classifier
2024-02-12 08:15:47,479 [INFO] shape of y : (719,), shape of X : (719, 10824)
2024-02-12 08:15:47,480 [INFO] fitting classifier : AdaBoost
2024-02-12 08:16:06,680 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

  similarity_matrix = dot_product / union


2024-02-12 08:16:15,917 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:15,922 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:16:21,756 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:21,792 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:16:21,928 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:22,429 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:22,435 [INFO] building data for classifier
2024-02-12 08:16:22,941 [INFO] shape of y : (719,), shape of X : (719, 10824)
2024-02-12 08:16:22,944 [INFO] fitting classifier : AdaBoost
2024-02-12 08:16:43,097 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

  similarity_matrix = dot_product / union


2024-02-12 08:16:53,370 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:53,375 [INFO] Building Jaccard similarity for microbes
2024-02-12 08:16:58,242 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:58,269 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-12 08:16:58,384 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:58,776 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-12 08:16:58,779 [INFO] building data for classifier
2024-02-12 08:16:59,056 [INFO] shape of y : (716,), shape of X : (716, 10824)
2024-02-12 08:16:59,060 [INFO] fitting classifier : AdaBoost
2024-02-12 08:17:19,914 [INFO] Result on Train Data : {'AUC': 1.0, 'ACC': 1.0, 'F1 Score': 1.0, 'AUPR': 0, '

<base.evaluation.Result at 0x7850a1933520>