In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
!pip install torch_geometric



In [39]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html


In [40]:
cd /content/drive/MyDrive/Academic/Topics/AI/Machine\ Learning\ Dr.\ Montazeri/Project/ml_mda

/content/drive/MyDrive/Academic/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


# Requirements

In [41]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

In [42]:
logger = logging.getLogger(__name__)

In [43]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [44]:
from src.optimization import MatrixFeatureBasedMDAClassifierTrainer, MatrixFeatureBasedMDAClassifierTester
from src.config import SimpleClassifierConfig
from src.models import GaussianSimilarityFeatureBasedMDAClassifier, GaussianSimilarityFeatureBasedMDAClassifierFactory
from src.data import MicrobeDiseaseAssociationData, MicrobeDiseaseAssociationTrainTestSpliter
from src.features import get_associations, get_entities
from src.utils import train_test_sampler
from base import cross_validation, OptimizerConfig


# Classification

## Data

In [45]:
associations = get_associations()

train_indices, test_indices = train_test_sampler(associations.shape[0], 0.7)

data = MicrobeDiseaseAssociationData(associations)

train_data = MicrobeDiseaseAssociationData(associations.iloc[train_indices])
test_data = MicrobeDiseaseAssociationData(associations.iloc[test_indices])

## Classifier

In [46]:
microbe_ids = get_entities().loc[get_entities()['type'] == 'Microbe']['id'].tolist()
disease_ids = get_entities().loc[get_entities()['type'] == 'Disease']['id'].tolist()

In [47]:
simple_classifier_config = SimpleClassifierConfig()
simple_classifier_config.model_name = "simple classifier"
simple_classifier_config.input_dim = len(microbe_ids) + len(disease_ids)
simple_classifier_config.hidden_dim = 8
simple_classifier_config.output_dim = 1
simple_classifier_config.num_layers = 2
simple_classifier_config.dropout = 0.3

In [48]:
mda_classifier = GaussianSimilarityFeatureBasedMDAClassifier(simple_classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)

2024-02-10 19:46:17,854 [INFO] Initializing MDFeatureBasedMDAClassifier with model : simple classifier
2024-02-10 19:46:17,855 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-10 19:46:17,858 [INFO] Initial SimpleMLP with 10824 input dimension, 8 hidden dimension, 1 
            output dimension, 2 layers and with 0.3 dropout
2024-02-10 19:46:17,861 [INFO] Initializing MatrixFeatureExtractor
2024-02-10 19:46:17,862 [INFO] Initializing SimilarityFeatureExtractor


## Optimizer

In [49]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.optimizer = torch.optim.Adam
classifier_optimizer_config.criterion = torch.nn.BCEWithLogitsLoss()
classifier_optimizer_config.lr = 0.01
classifier_optimizer_config.batch_size = 32
classifier_optimizer_config.n_epoch = 50
classifier_optimizer_config.exp_name = "adam optimizer"
classifier_optimizer_config.save = False
classifier_optimizer_config.save_path = None
classifier_optimizer_config.device = device
classifier_optimizer_config.report_size = 10  # batch to report ratio
classifier_optimizer_config.threshold = 0.5

## Train Test Approach

### Train

In [50]:
train_result = MatrixFeatureBasedMDAClassifierTrainer().train(model=mda_classifier, data=train_data,config=classifier_optimizer_config)

2024-02-10 19:46:17,875 [INFO] Call Training with adam optimizer
2024-02-10 19:46:17,881 [INFO] Calling build with associations :      disease  microbe  increased
755    40873    64132          0
633    13565     5135          0
524     4928    52077          0
24     54370    30750          1
850    13213    33201          0
..       ...      ...        ...
547    45301    64598          0
188    43621     6454          1
845     7877     8218          0
753    45301    47880          0
446    43621    63336          1

[628 rows x 3 columns]
2024-02-10 19:46:17,915 [INFO] interaction matrix with shape (5179, 5645) has built
2024-02-10 19:46:18,137 [INFO] mask matrix with shape (5179, 5645) has built. This matrix shows not non elements.
2024-02-10 19:46:18,952 [INFO] interaction has been imputed to delete nans
2024-02-10 19:47:27,401 [INFO] Initializing SimplePytorchData with X shape : torch.Size([628, 10824]) and y shape : torch.Size([628, 1])
2024-02-10 19:47:27,403 [INFO] Running S

### Test

In [51]:
test_result = MatrixFeatureBasedMDAClassifierTester().test(model=mda_classifier, data=test_data, config=classifier_optimizer_config)

2024-02-10 19:47:29,468 [INFO] Call Testing with adam optimizer
2024-02-10 19:47:29,533 [INFO] Initializing SimplePytorchData with X shape : torch.Size([270, 10824]) and y shape : torch.Size([270, 1])
2024-02-10 19:47:29,534 [INFO] Running Simple Tester with config : adam optimizer
2024-02-10 19:47:29,536 [INFO] moving data and model to cpu
2024-02-10 19:47:29,556 [INFO] Result on Test Data : {'AUC': 0.7204904062895157, 'ACC': 0.662962962962963, 'F1 Score': 0.6612856532348117, 'AUPR': 0, 'Loss': 286.574225531684}


In [52]:
test_result.get_result()

{'AUC': 0.7204904062895157,
 'ACC': 0.662962962962963,
 'F1 Score': 0.6612856532348117,
 'AUPR': 0,
 'Loss': 286.574225531684}

In [53]:
mda_classifier.fe.microbe_similarity_matrix.sum(axis=0)[:100]

array([4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 9.600e+01, 4.959e+03, 4.959e+03, 4.959e+03,
       1.000e+01, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 1.000e+00, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 1.000e+00,
       4.959e+03, 4.959e+03, 1.000e+00, 4.959e+03, 4.959e+03, 4.959e+03,
       4.959e+03, 4.959e+03, 4.959e+03, 4.959e+03, 

## Cross Validation

In [54]:
trainer = MatrixFeatureBasedMDAClassifierTrainer()
tester = MatrixFeatureBasedMDAClassifierTester()
factory = GaussianSimilarityFeatureBasedMDAClassifierFactory(model_config=simple_classifier_config, microbe_ids=microbe_ids, disease_ids=disease_ids)
spliter = MicrobeDiseaseAssociationTrainTestSpliter(data.associations)
cross_validation(k=5, data_size=data.associations.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-10 19:47:29,596 [INFO] Initializing DummyFeatureBasedMDAClassifierFactory
2024-02-10 19:47:29,599 [INFO] Initializing MicrobeDiseaseAssociationTrainTestSpliter
2024-02-10 19:47:29,600 [INFO] Start 5-fold Cross Validation with config : adam optimizer
2024-02-10 19:47:29,604 [INFO] ---- Fold 1 ----
2024-02-10 19:47:29,605 [INFO] Initializing MDFeatureBasedMDAClassifier with model : simple classifier
2024-02-10 19:47:29,606 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-10 19:47:29,611 [INFO] Initial SimpleMLP with 10824 input dimension, 8 hidden dimension, 1 
            output dimension, 2 layers and with 0.3 dropout
2024-02-10 19:47:29,614 [INFO] Initializing MatrixFeatureExtractor
2024-02-10 19:47:29,617 [INFO] Initializing SimilarityFeatureExtractor
2024-02-10 19:47:29,618 [INFO] Call Training with adam optimizer
2024-02-10 19:47:29,623 [INFO] Calling build with associations :      disease  microbe  increased
1      43621    40832          1
5 

<base.evaluation.Result at 0x788ef6884970>