# Configure environment

In [None]:
"""
Choose the dataset name for the ground_truth and graph embeddings
"""
dataset_name = "openML"

"""
choose integer number of ratio negative/positive for the negative sampling (0 to 20)
"""
neg_sample = 2

"""
Choose experiment ["hold_out","random_subsam","10_cv"] : 
- hold_out will get the train/test split within ./ground_truth/dataset_name/hold_out
- random_subsam will isolate 1 node from some clusters of similar nodes in the ground truth. The isolated nodes will not be seen during training
- 10_cv: will split the ground truth in 10 folds, using each fold as test at least once. 
"""
strategy = "random_subsam"

"""
Choose to use the selected strategy to create a new split 
or reuse a previously created one (useful to repeat exact same experiment)
"""
create_new_split = False

print("Env variables set")

#import libraries
import step3_gcnsm
from step3_gcnsm import confusion_matrix as confusion_matrix
from step3_gcnsm import train as train
from step3_gcnsm import cross_validation as cross_validation
from step3_gcnsm import test_mask, train_mask
from step3_gcnsm import g
import step3_gcn_nn_concatenate as gcn_nn
import step3_gcn_loss as gcn_loss
import step3_gcn_training as gcn_training
import step3_plot_results as plot
step3_gcnsm.load_env(ds_name=dataset_name,ns=neg_sample,experiment=strategy,new_split=create_new_split)
print("\n SETUP IS READY")

# Choose NN architecture and loss function, then run tests

### Config and run training
### NN architectures: 

In [None]:
##run this to see the different options of NN architectures
gcn_nn.get_options()

### Loss functions: 
{<br>
    "0": "ContrastiveLoss", <br>
    "1": "CosineEmbeddingLoss", <br>
}

### Optimizer
{<br>
    "adam" (default)<br>
    "sgd"<br> 
}


### Loss functions parameters examples: format -> [margin]+[aggregation_function] 
{<br>
    0.9+mean, <br>
    0.7+mean, <br>
    0.5+mean, <br>
    0.3+mean, <br>
    0.9+sum, <br>
    0.7+sum, <br>
    0.5+sum, <br>
    0.3+sum, <br>
}

### batch_splits examples: 
{<br>
    1024, <br>
    2048, <br>
}
### learning rate examples (lr): 
{<br>
    6e-3, <br>
    1e-2, <br>
}

### Examples

In [None]:
# #load model from path
# training = gcn_training.Training()
# training.load_state(path="./models/[file_name].pt")
# train(training,iterations=N)

# #train new model and specify parameters
# training = gcn_training.Training()
# training.set_training(
#             net_name= "Fasttext2_364",  #_of_option for NN architecture
#             batch_splits=1024 ,#_of_sets(this will (give dataset / batch_splits) size of batch
#             lr=1e-2 , #learning rate for training (e.g. 1e-2 )
#             loss_name=gcn_loss.get_option_name(1), #_of_option for loss 
#             loss_parameters="0.5+mean" ,#loss function parameters separated by '+' e.g. for cosine and contrastive "0.0+mean"
#             optimizer_name="sgd" ) #adam or sgd, default adam
# train(training,iterations=120)

## Print confusion matrix and results using the training object
#confusion_matrix(training.net, g, g.ndata['vector'], step3_gcnsm.test_mask,training.loss_name,threshold = 0.5)

### 10-fold Cross Validation

In [None]:
# #train new model and specify parameters
# training_object = gcn_training.Training()
# training_object.set_training(
#             net_name= gcn_nn.get_option_name(),  #_of_option for NN architecture
#             batch_splits= ,#_of_sets(this will (give dataset / batch_splits) size of batch
#             lr= , #learning rate for training (e.g. 1e-3 )
#             loss_name=gcn_loss.get_option_name(), #_of_option for loss 
#             loss_parameters= ,#loss function parameters separated by '+' e.g. for cosine and contrastive "0.0+mean"
#             optimizer_name= ) #adam or sgd, default adam

##cross_validation(training_object,iterations_per_fold,range_folds,nsample,create_split)

## Plot results <br>

<p>This will plot charts of fscore/accuracy for all the results that match the parameters options under the /results folder</p>

#### Parameters options

<p> Choose one of each and pass it to the corresponding plot function in the following order:

<b>1) neg_sample</b> = [1,2,3,4...etc] <br>
<b>2) ds_name</b> = ["openml_203ds_datasets_matching"] <br>
<b>3) experiment</b> = ["10_cv","random_subsam","hold_out"] <br>
<b>4) archi</b> = ["Fasttext_150","Fasttext_300","Bert_300","Bert_768"] <br>
<b>5) optimizer</b> = ["adam","sgd"] <br>
<b>6) loss_functions</b> = ["ContrastiveLoss","CosineEmbeddingLoss"] <br>

#### Types of chart
<b>plot.plot_cv_details:</b> line charts of accuracy and fscore results for the several runs in cv_10 and random_subsampling<br>
<b>plot.plot_bar </b>: bar charts of maximum results of accuracy and fscore for the several runs in cv_10 and random_subsampling <br>
<b>plot.plot_details</b>: line charts of accuracy, fscore, recall and precision results for a hold_out run

In [None]:
##examples
plot.plot_cv_details(0,"openml_203ds_datasets_matching","isolation","Fasttext2_364","sgd","CosineEmbeddingLoss",nit=[124])
plot.plot_bar(0,"openml_203ds_datasets_matching","isolation","Fasttext2_364","sgd","CosineEmbeddingLoss",nit=[124])
plot.plot_details(0,"monitor","isolation","Fasttext2_364","sgd","CosineEmbeddingLoss",nit=[50])
