In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Academic/Topics/AI/Machine\ Learning\ Dr.\ Montazeri/Project/ml_mda

/content/drive/MyDrive/Academic/Topics/AI/Machine Learning Dr. Montazeri/Project/ml_mda


In [3]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.0 MB[0m [31m7.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m0.9/1.0 MB[0m [31m13.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


In [4]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/pyg_lib-0.4.0%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m84.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m65.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_cluster-1.6.3%2Bpt

# Requirements

In [5]:
import torch

from base import OptimizerConfig, cross_validation
from base import SimplePytorchData, SimplePytorchDataTrainTestSplit
from base import SimpleTrainer, SimpleTester
from src.config import Node2VecConfig, Node2VecOptimizerConfig, SimpleClassifierConfig
from src.features import get_node2vec_pair_embedd_for_training_data, get_associations
from src.models import SimpleMDAClassifier, SimpleMDAClassifierFactory
from src.utils import train_test_sampler, prj_logger

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [7]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler(stream=sys.stdout)
    ],
    force=True
)

# Pretraining with Node2Vec

## Configs

In [8]:
# Node2Vec Config
node2vec_config = Node2VecConfig()
node2vec_config.embedding_dim = 32
node2vec_config.walk_length = 50
node2vec_config.context_size = 10
node2vec_config.walks_per_node = 10
node2vec_config.num_negative_samples = 1
node2vec_config.p = 1.0
node2vec_config.q = 1.0
node2vec_config.num_nodes = None
node2vec_config.sparse = True

# Node2Vec Optimizer Config
node2vec_optimizer_config = Node2VecOptimizerConfig()
node2vec_optimizer_config.exp_name = 'Node2VecOptimizer Default'
node2vec_optimizer_config.shuffle = True
node2vec_optimizer_config.num_workers = 2
node2vec_optimizer_config.lr = 0.01
node2vec_optimizer_config.device = device
node2vec_optimizer_config.report_size = 1000
node2vec_optimizer_config.optimizer = torch.optim.SparseAdam

## Embeddings

In [9]:
md_embed = get_node2vec_pair_embedd_for_training_data(node2vec_config, node2vec_optimizer_config)

2024-02-09 07:36:42,620 [INFO] Calling get_node2vec_pair_embedd on cuda device ...
2024-02-09 07:36:42,621 [INFO] Calling get_homogeneous_graph
2024-02-09 07:36:46,538 [INFO] homogeneous data : Data(x=[66911, 1], edge_index=[2, 633662])
2024-02-09 07:36:46,545 [INFO] Calling get_node2vec_embedd on cuda device ...
2024-02-09 07:36:46,546 [INFO] Creating Node2Vec model ...
2024-02-09 07:36:46,549 [INFO] Calling get_homogeneous_graph
2024-02-09 07:36:47,250 [INFO] homogeneous data : Data(x=[66911, 1], edge_index=[2, 633662])
2024-02-09 07:36:47,252 [INFO] Initialing MDNode2Vec with model_config {'model_name': None, 'embedding_dim': 32, 'walk_length': 50}
2024-02-09 07:36:47,570 [INFO] Training Node2Vec ...
2024-02-09 07:36:47,572 [INFO] Running Node2vecTrainer with Node2VecOptimizer Default
2024-02-09 07:36:47,576 [INFO] Creating <class 'torch.optim.sparse_adam.SparseAdam'> with lr : 0.01
2024-02-09 07:36:47,578 [INFO] moving model to cuda
2024-02-09 07:36:47,584 [INFO] start batch optimi

# Classification

## Data

In [10]:
associations = get_associations()
y = torch.tensor(associations['increased'].tolist(), dtype=torch.float32).reshape(-1, 1).to(device)

In [11]:
# Train Test Split
train_indices, test_indices = train_test_sampler(y.shape[0], 0.7)

data = SimplePytorchData(md_embed, y)
train_data = SimplePytorchData(md_embed[train_indices], y[train_indices])
test_data = SimplePytorchData(md_embed[test_indices], y[test_indices])

2024-02-09 07:42:07,197 [INFO] Initializing SimplePytorchData with X shape : torch.Size([898, 64]) and y shape : torch.Size([898, 1])
2024-02-09 07:42:07,201 [INFO] Initializing SimplePytorchData with X shape : torch.Size([628, 64]) and y shape : torch.Size([628, 1])
2024-02-09 07:42:07,204 [INFO] Initializing SimplePytorchData with X shape : torch.Size([270, 64]) and y shape : torch.Size([270, 1])


## Classifier

In [12]:
simple_classifier_config = SimpleClassifierConfig()
simple_classifier_config.model_name = "simple classifier"
simple_classifier_config.input_dim = 64
simple_classifier_config.hidden_dim = 16
simple_classifier_config.output_dim = 1
simple_classifier_config.num_layers = 3
simple_classifier_config.dropout = 0.1

In [13]:
mda_classifier = SimpleMDAClassifier(simple_classifier_config)

2024-02-09 07:42:07,224 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-09 07:42:07,230 [INFO] Initial SimpleMLP with 64 input dimension, 16 hidden dimension, 1 
            output dimension, 3 layers and with 0.1 dropout


## Optimizer

In [14]:
classifier_optimizer_config = OptimizerConfig()
classifier_optimizer_config.optimizer = torch.optim.Adam
classifier_optimizer_config.criterion = torch.nn.BCEWithLogitsLoss()
classifier_optimizer_config.lr = 0.01
classifier_optimizer_config.batch_size = 32
classifier_optimizer_config.n_epoch = 50
classifier_optimizer_config.exp_name = "adam optimizer"
classifier_optimizer_config.save = False
classifier_optimizer_config.save_path = None
classifier_optimizer_config.device = device
classifier_optimizer_config.report_size = 10  # batch to report ratio
classifier_optimizer_config.threshold = 0.5

## Train Test Approach Evaluation

### Train

In [15]:
train_result = SimpleTrainer().train(model=mda_classifier,
                                     data=train_data,
                                     config=classifier_optimizer_config)

2024-02-09 07:42:07,251 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-09 07:42:07,252 [INFO] moving data and model to cuda
2024-02-09 07:42:07,786 [INFO] loss: 0.0212    [1,    10]
2024-02-09 07:42:07,820 [INFO] loss: 0.0189    [1,    20]
2024-02-09 07:42:07,853 [INFO] loss: 0.0170    [2,    10]
2024-02-09 07:42:07,889 [INFO] loss: 0.0168    [2,    20]
2024-02-09 07:42:07,932 [INFO] loss: 0.0140    [3,    10]
2024-02-09 07:42:07,964 [INFO] loss: 0.0150    [3,    20]
2024-02-09 07:42:07,999 [INFO] loss: 0.0145    [4,    10]
2024-02-09 07:42:08,034 [INFO] loss: 0.0127    [4,    20]
2024-02-09 07:42:08,070 [INFO] loss: 0.0125    [5,    10]
2024-02-09 07:42:08,093 [INFO] loss: 0.0126    [5,    20]
2024-02-09 07:42:08,116 [INFO] loss: 0.0118    [6,    10]
2024-02-09 07:42:08,139 [INFO] loss: 0.0125    [6,    20]
2024-02-09 07:42:08,161 [INFO] loss: 0.0116    [7,    10]
2024-02-09 07:42:08,184 [INFO] loss: 0.0120    [7,    20]
2024-02-09 07:42:08,209 [INFO] loss: 0.0118 

### Test

In [16]:
test_result = SimpleTester().test(model=mda_classifier,
                                  data=test_data,
                                  config=classifier_optimizer_config)

2024-02-09 07:42:10,217 [INFO] Running Simple Tester with config : adam optimizer
2024-02-09 07:42:10,220 [INFO] moving data and model to cuda
2024-02-09 07:42:10,271 [INFO] Result on Test Data : {'AUC': 0.9349794238683128, 'ACC': 0.8555555555555555, 'F1 Score': 0.854836708529205, 'AUPR': 0, 'Loss': 0.49580668409665424}


In [17]:
test_result.get_result()

{'AUC': 0.9349794238683128,
 'ACC': 0.8555555555555555,
 'F1 Score': 0.854836708529205,
 'AUPR': 0,
 'Loss': 0.49580668409665424}

## Cross Validation Evaluation

In [18]:
trainer = SimpleTrainer()
tester = SimpleTester()
factory = SimpleMDAClassifierFactory(simple_classifier_config)
spliter = SimplePytorchDataTrainTestSplit(data)
cross_validation(k=5, data_size=data.X.shape[0], train_test_spliter=spliter, model_factory=factory,
                    trainer=trainer, tester=tester, config=classifier_optimizer_config)

2024-02-09 07:42:10,291 [INFO] Initializing SimpleMDAClassifierFactory with model : simple classifier
2024-02-09 07:42:10,294 [INFO] Initializing SimplePytorchDataTrainTestSplit
2024-02-09 07:42:10,297 [INFO] Start 5-fold Cross Validation with config : adam optimizer
2024-02-09 07:42:10,300 [INFO] ---- Fold 1 ----
2024-02-09 07:42:10,303 [INFO] Initializing SimplePytorchData with X shape : torch.Size([719, 64]) and y shape : torch.Size([719, 1])
2024-02-09 07:42:10,305 [INFO] Initializing SimplePytorchData with X shape : torch.Size([179, 64]) and y shape : torch.Size([179, 1])
2024-02-09 07:42:10,306 [INFO] Initializing SimpleMDAClassifier with model : simple classifier
2024-02-09 07:42:10,307 [INFO] Initial SimpleMLP with 64 input dimension, 16 hidden dimension, 1 
            output dimension, 3 layers and with 0.1 dropout
2024-02-09 07:42:10,309 [INFO] Running Simple Trainer with config : adam optimizer
2024-02-09 07:42:10,315 [INFO] moving data and model to cuda
2024-02-09 07:42:10

<base.evaluation.Result at 0x7e45a201e7a0>