In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
cd /content/drive/MyDrive/Academic/Topics/AI/Machine\ Learning\ Dr.\ Montazeri/Project/ml_mda

/content/drive/MyDrive/Academic/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


In [39]:
!pip install torch_geometric



In [40]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html


# Requirements

In [41]:
import torch

from base import OptimizerConfig, cross_validation
from base import SimplePytorchData, SimplePytorchDataTrainTestSplit
from base import SimpleTrainer, SimpleTester
from src.config import Node2VecConfig, Node2VecOptimizerConfig, SimpleClassifierConfig, GraphAutoEncoderConfig
from src.features import get_associations, get_homogeneous_graph, get_gae_pair_embedd_for_training_data
from src.models import SimpleMDAClassifier, SimpleMDAClassifierFactory
from src.utils import train_test_sampler, prj_logger
from torch_geometric.nn import GCNConv

In [42]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [43]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

In [44]:
logger = prj_logger.getLogger(__name__)

# Graph Auto Encoder Embedding

## Config

In [45]:
gae_optimizer_config = OptimizerConfig()
gae_optimizer_config.optimizer = torch.optim.Adam
gae_optimizer_config.criterion = torch.nn.BCEWithLogitsLoss()
gae_optimizer_config.lr = 0.01
gae_optimizer_config.n_epoch = 80
gae_optimizer_config.exp_name = "Optimizer for Graph Auto Encoder"
gae_optimizer_config.device = device

In [46]:
gae_model_config = GraphAutoEncoderConfig()
gae_model_config.model_name = "Graph Auto Encoder Model"
gae_model_config.device = device
gae_model_config.input_dim = get_homogeneous_graph(device).x.shape[0]
gae_model_config.hidden_dim = 32
gae_model_config.output_dim = 32
gae_model_config.num_layers = 3
gae_model_config.dropout = 0.5
gae_model_config.with_embedd = True
gae_model_config.GCN = GCNConv

2024-02-09 07:33:22,797 [INFO] Calling get_homogeneous_graph
2024-02-09 07:33:24,742 [INFO] homogeneous data : Data(x=[66911, 1], edge_index=[2, 633662])


## Embedding

In [47]:
md_embed = get_gae_pair_embedd_for_training_data(gae_model_config, gae_optimizer_config)

2024-02-09 07:33:24,768 [INFO] Calling get_gae_pair_embedd on cuda device ...
2024-02-09 07:33:24,769 [INFO] Calling get_homogeneous_graph
2024-02-09 07:33:26,353 [INFO] homogeneous data : Data(x=[66911, 1], edge_index=[2, 633662])
2024-02-09 07:33:26,356 [INFO] Calling get_node2vec_embedd on cuda device ...
2024-02-09 07:33:26,357 [INFO] Creating GraphAutoEncoderModel ...
2024-02-09 07:33:26,361 [INFO] Initializing GraphAutoEncoderModel with config : {'model_name': 'Graph Auto Encoder Model', 'input_dim': 66911, 'hidden_dim': 32, 'output_dim': 32, 'num_layers': 3, 'dropout': 0.5, 'with_embedd': True, 'GCN': <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>}
2024-02-09 07:33:26,363 [INFO] Initializing GCNAutoEncoder ...
2024-02-09 07:33:26,372 [INFO] Initial GCNEncoder with 66911 input_dimension,
            32 hidden dimension, 32 output dimension,
            3 layers and with 0.5 dropout
2024-02-09 07:33:26,402 [INFO] Initial LinkDecoder
2024-02-09 07:33:26,403 [INFO] Calling get_h

# Classification

## Data

In [48]:
associations = get_associations()
y = torch.tensor(associations['increased'].tolist(), dtype=torch.float32).reshape(-1, 1).to(device)

In [49]:
# Train Test Split
train_indices, test_indices = train_test_sampler(y.shape[0], 0.7)

data = SimplePytorchData(md_embed, y)
train_data = SimplePytorchData(md_embed[train_indices], y[train_indices])
test_data = SimplePytorchData(md_embed[test_indices], y[test_indices])

2024-02-09 07:34:55,048 [INFO] Initializing SimplePytorchData with X shape : torch.Size([898, 64]) and y shape : torch.Size([898, 1])
2024-02-09 07:34:55,052 [INFO] Initializing SimplePytorchData with X shape : torch.Size([628, 64]) and y shape : torch.Size([628, 1])
2024-02-09 07:34:55,054 [INFO] Initializing SimplePytorchData with X shape : torch.Size([270, 64]) and y shape : torch.Size([270, 1])


## Classifier

In [50]:
simple_classifier_config = SimpleClassifierConfig()
simple_classifier_config.model_name = "simple classifier"
simple_classifier_config.input_dim = md_embed.shape[1]
simple_classifier_config.hidden_dim = 32
simple_classifier_config.output_dim = 1
simple_classifier_config.num_layers = 2
simple_classifier_config.dropout = 0.1

In [51]:
mda_classifier = SimpleMDAClassifier(simple_classifier_config)

2024-02-09 07:34:55,071 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-09 07:34:55,072 [INFO] Initial SimpleMLP with 64 input dimension, 32 hidden dimension, 1 
            output dimension, 2 layers and with 0.1 dropout


## Optimizer

In [52]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.optimizer = torch.optim.Adam
classifier_optimizer_config.criterion = torch.nn.BCEWithLogitsLoss()
classifier_optimizer_config.lr = 0.01
classifier_optimizer_config.batch_size = 32
classifier_optimizer_config.n_epoch = 50
classifier_optimizer_config.exp_name = "adam optimizer"
classifier_optimizer_config.save = False
classifier_optimizer_config.save_path = None
classifier_optimizer_config.device = device
classifier_optimizer_config.report_size = 10  # batch to report ratio
classifier_optimizer_config.threshold = 0.5

## Train Test Approach

### Train

In [53]:
train_result = SimpleTrainer().train(model=mda_classifier,
                                     data=train_data,
                                     config=classifier_optimizer_config)

2024-02-09 07:34:55,091 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-09 07:34:55,092 [INFO] moving data and model to cuda
2024-02-09 07:34:55,132 [INFO] loss: 0.0212    [1,    10]
2024-02-09 07:34:55,162 [INFO] loss: 0.0213    [1,    20]
2024-02-09 07:34:55,189 [INFO] loss: 0.0210    [2,    10]
2024-02-09 07:34:55,213 [INFO] loss: 0.0208    [2,    20]
2024-02-09 07:34:55,234 [INFO] loss: 0.0203    [3,    10]
2024-02-09 07:34:55,256 [INFO] loss: 0.0199    [3,    20]
2024-02-09 07:34:55,280 [INFO] loss: 0.0199    [4,    10]
2024-02-09 07:34:55,301 [INFO] loss: 0.0197    [4,    20]
2024-02-09 07:34:55,323 [INFO] loss: 0.0195    [5,    10]
2024-02-09 07:34:55,346 [INFO] loss: 0.0200    [5,    20]
2024-02-09 07:34:55,367 [INFO] loss: 0.0194    [6,    10]
2024-02-09 07:34:55,389 [INFO] loss: 0.0192    [6,    20]
2024-02-09 07:34:55,411 [INFO] loss: 0.0194    [7,    10]
2024-02-09 07:34:55,432 [INFO] loss: 0.0193    [7,    20]
2024-02-09 07:34:55,453 [INFO] loss: 0.0194 

### Test

In [54]:
test_result = SimpleTester().test(model=mda_classifier,
                                  data=test_data,
                                  config=classifier_optimizer_config)

2024-02-09 07:34:57,438 [INFO] Running Simple Tester with config : adam optimizer
2024-02-09 07:34:57,440 [INFO] moving data and model to cuda
2024-02-09 07:34:57,464 [INFO] Result on Test Data : {'AUC': 0.8587058112045742, 'ACC': 0.7481481481481481, 'F1 Score': 0.7454101724998613, 'AUPR': 0, 'Loss': 0.49913786848386127}


In [55]:
test_result.get_result()

{'AUC': 0.8587058112045742,
 'ACC': 0.7481481481481481,
 'F1 Score': 0.7454101724998613,
 'AUPR': 0,
 'Loss': 0.49913786848386127}

## Cross Validation

In [56]:
trainer = SimpleTrainer()
tester = SimpleTester()
factory = SimpleMDAClassifierFactory(simple_classifier_config)
spliter = SimplePytorchDataTrainTestSplit(data)
cross_validation(k=5, data_size=data.X.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-09 07:34:57,482 [INFO] Initializing SimpleMDAClassifierFactory with model : simple classifier
2024-02-09 07:34:57,484 [INFO] Initializing SimplePytorchDataTrainTestSplit
2024-02-09 07:34:57,486 [INFO] Start 5-fold Cross Validation with config : adam optimizer
2024-02-09 07:34:57,494 [INFO] ---- Fold 1 ----
2024-02-09 07:34:57,496 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 64]) and y shape : torch.Size([719, 1])
2024-02-09 07:34:57,498 [INFO] Initializing SimplePytorchData with X shape : torch.Size([179, 64]) and y shape : torch.Size([179, 1])
2024-02-09 07:34:57,499 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-09 07:34:57,500 [INFO] Initial SimpleMLP with 64 input dimension, 32 hidden dimension, 1 
            output dimension, 2 layers and with 0.1 dropout
2024-02-09 07:34:57,502 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-09 07:34:57,503 [INFO] moving data and model to cuda
2024-02-09 07:34:57

<base.evaluation.Result at 0x7d8387b13c40>