In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.0 MB[0m [31m3.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


In [3]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/pyg_lib-0.4.0%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m63.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_cluster-1.6.3%2Bp

In [4]:
cd /content/drive/MyDrive/Academic/Topics/AI/Machine\ Learning\ Dr.\ Montazeri/Project/ml_mda

/content/drive/MyDrive/Academic/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


# Requirements

In [5]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

In [6]:
logger = logging.getLogger(__name__)

In [7]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
from src.optimization import MatrixFeatureBasedMDAClassifierTrainer, MatrixFeatureBasedMDAClassifierTester
from src.config import SimpleClassifierConfig
from src.models import JaccardSimilarityFeatureBasedMDAClassifier, JaccardSimilarityFeatureBasedMDAClassifierFactory
from src.data import MicrobeDiseaseAssociationData, MicrobeDiseaseAssociationTrainTestSpliter
from src.features import get_associations, get_entities
from src.utils import train_test_sampler
from base import cross_validation, OptimizerConfig


2024-02-11 20:55:34,518 [INFO] NumExpr defaulting to 2 threads.


# Classification

## Data

In [9]:
associations = get_associations()

train_indices, test_indices = train_test_sampler(associations.shape[0], 0.7)

data = MicrobeDiseaseAssociationData(associations)

train_data = MicrobeDiseaseAssociationData(associations.iloc[train_indices])
test_data = MicrobeDiseaseAssociationData(associations.iloc[test_indices])

## Classifier

In [10]:
microbe_ids = get_entities().loc[get_entities()['type'] == 'Microbe']['id'].tolist()
disease_ids = get_entities().loc[get_entities()['type'] == 'Disease']['id'].tolist()

In [11]:
simple_classifier_config = SimpleClassifierConfig()
simple_classifier_config.model_name = "simple classifier"
simple_classifier_config.input_dim = len(microbe_ids) + len(disease_ids)
simple_classifier_config.hidden_dim = 8
simple_classifier_config.output_dim = 1
simple_classifier_config.num_layers = 2
simple_classifier_config.dropout = 0.3

In [12]:
mda_classifier = JaccardSimilarityFeatureBasedMDAClassifier(simple_classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)

2024-02-11 20:55:41,738 [INFO] Initializing MatrixFeatureExtractor
2024-02-11 20:55:41,740 [INFO] Initializing SimilarityFeatureExtractor
2024-02-11 20:55:41,744 [INFO] Initializing MDFeatureBasedMDAClassifier with model : simple classifier
2024-02-11 20:55:41,745 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-11 20:55:41,746 [INFO] Initial SimpleMLP with 10824 input dimension, 8 hidden dimension, 1 
            output dimension, 2 layers and with 0.3 dropout


## Optimizer

In [13]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.optimizer = torch.optim.Adam
classifier_optimizer_config.criterion = torch.nn.BCEWithLogitsLoss()
classifier_optimizer_config.lr = 0.01
classifier_optimizer_config.batch_size = 32
classifier_optimizer_config.n_epoch = 50
classifier_optimizer_config.exp_name = "adam optimizer"
classifier_optimizer_config.save = False
classifier_optimizer_config.save_path = None
classifier_optimizer_config.device = device
classifier_optimizer_config.report_size = 10  # batch to report ratio
classifier_optimizer_config.threshold = 0.5

## Train Test Approach

### Train

In [14]:
train_result = MatrixFeatureBasedMDAClassifierTrainer().train(model=mda_classifier, data=train_data,config=classifier_optimizer_config)

2024-02-11 20:55:41,795 [INFO] Call Training with adam optimizer
2024-02-11 20:55:41,806 [INFO] Calling build with associations :      disease  microbe  increased
459    31069    16707          0
336     7877    14120          1
328    61336    54894          1
345    20066    13641          1
571    59444    23039          0
..       ...      ...        ...
341      654    35590          1
369    59444    61711          1
677     9724     8766          0
543    28016    59584          0
455    64149    64571          0

[628 rows x 3 columns]
2024-02-11 20:55:41,955 [INFO] interaction matrix with shape (5179, 5645) has built
2024-02-11 20:55:42,262 [INFO] mask matrix with shape (5179, 5645) has built. This matrix shows not non elements.
2024-02-11 20:55:44,257 [INFO] interaction has been imputed to delete nans
2024-02-11 20:55:44,259 [INFO] Building Jaccard similarity for diseases
2024-02-11 20:55:49,601 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0

  similarity_matrix = dot_product / union


2024-02-11 20:55:51,046 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:55:51,055 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:55:56,133 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:55:56,169 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:55:56,340 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:55:56,919 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:55:57,221 [INFO] Initializing SimplePytorchData with X shape : torch.Size([628, 10824]) and y shape : torch.Size([628, 1])
2024-02-11 20:55:57,225 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:55:57,227 [INFO] moving data and model to cpu
2024-02-11 20:55:57,383 [IN

### Test

In [15]:
test_result = MatrixFeatureBasedMDAClassifierTester().test(model=mda_classifier, data=test_data, config=classifier_optimizer_config)

2024-02-11 20:56:01,757 [INFO] Call Testing with adam optimizer
2024-02-11 20:56:01,885 [INFO] Initializing SimplePytorchData with X shape : torch.Size([270, 10824]) and y shape : torch.Size([270, 1])
2024-02-11 20:56:01,886 [INFO] Running Simple Tester with config : adam optimizer
2024-02-11 20:56:01,889 [INFO] moving data and model to cpu
2024-02-11 20:56:01,932 [INFO] Result on Test Data : {'AUC': 0.841747146619842, 'ACC': 0.8333333333333334, 'F1 Score': 0.8304848273456575, 'AUPR': 0, 'Loss': 1.059285541375478}


In [16]:
test_result.get_result()

{'AUC': 0.841747146619842,
 'ACC': 0.8333333333333334,
 'F1 Score': 0.8304848273456575,
 'AUPR': 0,
 'Loss': 1.059285541375478}

In [17]:
mda_classifier.fe.microbe_similarity_matrix.sum(axis=0)[:100]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , 89.00119048,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , 15.57738095,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

## Cross Validation

In [18]:
trainer = MatrixFeatureBasedMDAClassifierTrainer()
tester = MatrixFeatureBasedMDAClassifierTester()
factory = JaccardSimilarityFeatureBasedMDAClassifierFactory(model_config=simple_classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)
spliter = MicrobeDiseaseAssociationTrainTestSpliter(data.associations)
cross_validation(k=5, data_size=data.associations.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-11 20:56:02,012 [INFO] Initializing SimilarityFeatureBasedMDAClassifierFactory
2024-02-11 20:56:02,016 [INFO] Initializing MicrobeDiseaseAssociationTrainTestSpliter
2024-02-11 20:56:02,020 [INFO] Start 5-fold Cross Validation with config : adam optimizer
2024-02-11 20:56:02,024 [INFO] ---- Fold 1 ----
2024-02-11 20:56:02,027 [INFO] Initializing MatrixFeatureExtractor
2024-02-11 20:56:02,029 [INFO] Initializing SimilarityFeatureExtractor
2024-02-11 20:56:02,031 [INFO] Initializing MDFeatureBasedMDAClassifier with model : simple classifier
2024-02-11 20:56:02,032 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-11 20:56:02,033 [INFO] Initial SimpleMLP with 10824 input dimension, 8 hidden dimension, 1 
            output dimension, 2 layers and with 0.3 dropout
2024-02-11 20:56:02,037 [INFO] Call Training with adam optimizer
2024-02-11 20:56:02,042 [INFO] Calling build with associations :      disease  microbe  increased
0      50863    33211         

  similarity_matrix = dot_product / union


2024-02-11 20:56:11,569 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:11,572 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:56:15,953 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:15,991 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:56:16,154 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:16,788 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:17,273 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 10824]) and y shape : torch.Size([719, 1])
2024-02-11 20:56:17,277 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:56:17,279 [INFO] moving data and model to cpu
2024-02-11 20:56:17,335 [IN

  similarity_matrix = dot_product / union


2024-02-11 20:56:31,387 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:31,393 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:56:36,956 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:36,992 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:56:37,156 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:37,726 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:38,052 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 10824]) and y shape : torch.Size([719, 1])
2024-02-11 20:56:38,055 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:56:38,059 [INFO] moving data and model to cpu
2024-02-11 20:56:38,099 [IN

  similarity_matrix = dot_product / union


2024-02-11 20:56:53,190 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:53,194 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:56:57,608 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:57,650 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:56:57,820 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:58,384 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:56:58,795 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 10824]) and y shape : torch.Size([719, 1])
2024-02-11 20:56:58,797 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:56:58,802 [INFO] moving data and model to cpu
2024-02-11 20:56:58,874 [IN

  similarity_matrix = dot_product / union


2024-02-11 20:57:13,336 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:13,343 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:57:18,938 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:18,974 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:57:19,136 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:19,702 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:20,026 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 10824]) and y shape : torch.Size([719, 1])
2024-02-11 20:57:20,027 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:57:20,031 [INFO] moving data and model to cpu
2024-02-11 20:57:20,082 [IN

  similarity_matrix = dot_product / union


2024-02-11 20:57:34,436 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:34,440 [INFO] Building Jaccard similarity for microbes
2024-02-11 20:57:38,862 [INFO] calculating dot product :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:38,897 [INFO] sum rows :
[0. 0. 0. 0. 0.]
2024-02-11 20:57:39,062 [INFO] union :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:39,631 [INFO] similarity matrix :
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
2024-02-11 20:57:39,963 [INFO] Initializing SimplePytorchData with X shape : torch.Size([716, 10824]) and y shape : torch.Size([716, 1])
2024-02-11 20:57:39,964 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-11 20:57:39,967 [INFO] moving data and model to cpu
2024-02-11 20:57:40,021 [IN

<base.evaluation.Result at 0x7d2dcb738fa0>