From 9c45dc25aba034215bc6dc683dc6ad715fdac5cc Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Tue, 4 Jun 2024 23:24:32 +0100 Subject: [PATCH 01/20] Add kge doc portion #1 --- doc/modules/ROOT/content-nav.adoc | 5 + .../pages/python-runtime/introduction.adoc | 7 + .../knowledge-graph-embeddings.adoc | 187 ++++++++++++++++++ .../partials/python-runtime-algorithms.adoc | 1 + 4 files changed, 200 insertions(+) create mode 100644 doc/modules/ROOT/pages/python-runtime/introduction.adoc create mode 100644 doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc create mode 100644 doc/modules/ROOT/partials/python-runtime-algorithms.adoc diff --git a/doc/modules/ROOT/content-nav.adoc b/doc/modules/ROOT/content-nav.adoc index 134634114..4a8a3917a 100644 --- a/doc/modules/ROOT/content-nav.adoc +++ b/doc/modules/ROOT/content-nav.adoc @@ -4,6 +4,11 @@ * xref:graph-object.adoc[] * xref:algorithms.adoc[] * xref:pipelines.adoc[] +* xref:python-runtime/introduction.adoc[Python Runtime Algorithms] ++ +-- +include::partial$python-runtime-algorithms.adoc[] +-- * xref:model-object.adoc[] * xref:common-datasets.adoc[] * xref:rel-embedding-models.adoc[] diff --git a/doc/modules/ROOT/pages/python-runtime/introduction.adoc b/doc/modules/ROOT/pages/python-runtime/introduction.adoc new file mode 100644 index 000000000..177052b96 --- /dev/null +++ b/doc/modules/ROOT/pages/python-runtime/introduction.adoc @@ -0,0 +1,7 @@ += Introduction + +Python runtime works with GDS sessions. + +The list of available algorithms: + +include::ROOT:partial$python-runtime-algorithms.adoc[] \ No newline at end of file diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc new file mode 100644 index 000000000..8256acc8a --- /dev/null +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -0,0 +1,187 @@ += Knowledge graph embeddings + +Knowledge Grapg Embeddings (KGE) are a family of algorithms that learn low-dimensional representations of entities and relations in a knowledge graph. +The embeddings can be used to perform various tasks such as link prediction, entity classification, and entity clustering. + +// we define a KGEM as four components: an in- teraction model, a training approach, a loss function, and its usage of explicit inverse relations. + +To define the KGE algorithms, we need to define the following components: a scoring function, a training approach and a loss function. + +Python runtime provides the following scoring functions: + +* TransE +** https://proceedings.neurips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).^] +* DistMult +** https://arxiv.org/pdf/1412.6575[Yang, Bishan, et al. "Embedding entities and relations for learning and inference in knowledge bases." arXiv preprint arXiv:1412.6575 (2014).^] + + +KGE an _inductive_ algorithm for computing node embeddings for knowledge graphs. +Knowledge graph means that graph can have multiple node labels and relationship types. +The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. + +KGEMs (Knowledge Graph Embedding Models) are used to learn low-dimensional representations of entities and relations in a knowledge graph. +The embeddings can be used to perform various tasks such as link prediction, entity classification, and entity clustering. +KGEM can return the score for the triple `(head, relation, tail)` which can be used to predict the missing tail entity for a given head and relation. + + +* https://arxiv.org/pdf/1706.02216.pdf[William L. Hamilton, Rex Ying, and Jure Leskovec. "Inductive Representation Learning on Large Graphs." 2018.^] +* https://arxiv.org/pdf/1911.10232.pdf[Amit Pande, Kai Ni and Venkataramani Kini. "SWAG: Item Recommendations using Convolutions on Weighted Graphs." 2019.^] + + +== Train API + +[source,python] +---- +G = gds.graph.load_fb15k237("train") +gds.model.transe.train(G, + embedding_dimension = 256, + batch_size=512, + num_epochs=100, + optimizer='Adagrad', + optimizer_kwargs={'lr': 0.01}, + loss_function="MarginRanking", + loss_function_kwargs={'margin': 1.0}, +) +---- + +[cols="1,1,1,4", options="header"] +|==== +| Parameter | Type | Default value | Description + +| num_epochs +| int +| +| Number of epochs for training (must be greater than 0) + +| epochs_per_checkpoint +| int +| +| Number of epochs between checkpoints (must be greater than or equal to 0) + +| load_from_checkpoint +| Optional[tuple[str, int]] +| None +| Checkpoint to load from, specified as a tuple (path, epoch) + +| split_ratios +| dict[str, float] +| {TRAIN=0.8, TEST=0.2} +| Ratios for splitting the dataset into training and test sets + +| scoring_function +| str +| "transe" +| Function used to score embeddings + +| embedding_dimension +| int +| 256 +| Dimensionality of the embeddings (must be greater than 0) + +| batch_size +| int +| 512 +| Size of the training batch (must be greater than 0) + +| test_batch_size +| int +| 512 +| Size of the test batch (must be greater than 0) + +| epochs_per_val +| int +| 50 +| Number of epochs between validations (must be greater than or equal to 0) + +| optimizer +| str +| "adam" +| Optimizer to use for training + +| optimizer_kwargs +| dict[str, Any] +| {lr=0.01, weight_decay=0.0005} +| Additional arguments for the optimizer + +| lr_scheduler +| str +| ConstantLR +| Learning rate scheduler + +| lr_scheduler_kwargs +| dict[str, Any] +| {factor=1, total_iters=1000} +| Additional arguments for the learning rate scheduler + +| filtered_metrics +| bool +| False +| Whether to use filtered metrics during evaluation + +| negative_sampling_size +| int +| 1 +| Number of negative samples per positive sample + +| p_norm +| float +| 1.0 +| Norm to use in TransE scoring function + +| loss_function +| str +| MarginRanking +| Loss function to use for training + +| loss_function_kwargs +| dict[str, Any] +| {margin=1.0, adversarial_temperature=1.0, gamma=20.0} +| Additional arguments for the loss function + +| k_value +| int +| 10 +| Value of k used in evaluation metrics + +| do_validation +| bool +| True +| Whether to perform validation + +| do_test +| bool +| True +| Whether to perform testing + +| disable_tqdm +| bool +| True +| Whether to disable tqdm progress bars + +| inner_norm +| bool +| True +| Whether to apply normalization to embeddings + +| init_bound +| Optional[float] +| None +| Initial bound for embeddings (if any) +|==== + + +[[algorithms-embeddings-graph-sage-considerations]] +== Considerations + +=== Negative sampling + +=== sLCWA - Stochastic Local Closed World Assumption + +=== Loss function + +=== Optimizer + +=== Learning rate scheduler + +=== Inner normalisation + diff --git a/doc/modules/ROOT/partials/python-runtime-algorithms.adoc b/doc/modules/ROOT/partials/python-runtime-algorithms.adoc new file mode 100644 index 000000000..84ce632f8 --- /dev/null +++ b/doc/modules/ROOT/partials/python-runtime-algorithms.adoc @@ -0,0 +1 @@ +* xref:python-runtime/knowledge-graph-embeddings.adoc[Knowledge Graph Embeddings] \ No newline at end of file From 87f2423dbfd301beca12e174ee60843727012b8c Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Thu, 6 Jun 2024 13:35:28 +0100 Subject: [PATCH 02/20] Add KGE doc - considerations and configuration description --- .../images/python-runtime/delta-value.svg | 57 +++++++ .../python-runtime/distmult-formula.svg | 56 +++++++ .../ROOT/images/python-runtime/mrl.svg | 35 ++++ .../images/python-runtime/transe-formula.svg | 46 ++++++ .../knowledge-graph-embeddings.adoc | 154 +++++++++++++----- 5 files changed, 311 insertions(+), 37 deletions(-) create mode 100644 doc/modules/ROOT/images/python-runtime/delta-value.svg create mode 100644 doc/modules/ROOT/images/python-runtime/distmult-formula.svg create mode 100644 doc/modules/ROOT/images/python-runtime/mrl.svg create mode 100644 doc/modules/ROOT/images/python-runtime/transe-formula.svg diff --git a/doc/modules/ROOT/images/python-runtime/delta-value.svg b/doc/modules/ROOT/images/python-runtime/delta-value.svg new file mode 100644 index 000000000..37e48d333 --- /dev/null +++ b/doc/modules/ROOT/images/python-runtime/delta-value.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/python-runtime/distmult-formula.svg b/doc/modules/ROOT/images/python-runtime/distmult-formula.svg new file mode 100644 index 000000000..2d6dda3ae --- /dev/null +++ b/doc/modules/ROOT/images/python-runtime/distmult-formula.svg @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/python-runtime/mrl.svg b/doc/modules/ROOT/images/python-runtime/mrl.svg new file mode 100644 index 000000000..74e63ba13 --- /dev/null +++ b/doc/modules/ROOT/images/python-runtime/mrl.svg @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/python-runtime/transe-formula.svg b/doc/modules/ROOT/images/python-runtime/transe-formula.svg new file mode 100644 index 000000000..6f641a048 --- /dev/null +++ b/doc/modules/ROOT/images/python-runtime/transe-formula.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 8256acc8a..1a99d5f60 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -1,61 +1,137 @@ = Knowledge graph embeddings -Knowledge Grapg Embeddings (KGE) are a family of algorithms that learn low-dimensional representations of entities and relations in a knowledge graph. -The embeddings can be used to perform various tasks such as link prediction, entity classification, and entity clustering. - -// we define a KGEM as four components: an in- teraction model, a training approach, a loss function, and its usage of explicit inverse relations. - -To define the KGE algorithms, we need to define the following components: a scoring function, a training approach and a loss function. +Knowledge Graph Embeddings (KGE) refer to a family of algorithms designed to learn low-dimensional representations of entities and relations within a knowledge graph. +These embeddings are utilized for tasks including link prediction, entity classification, and entity clustering. Python runtime provides the following scoring functions: -* TransE +* `TransE` ** https://proceedings.neurips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).^] -* DistMult +* `DistMult` ** https://arxiv.org/pdf/1412.6575[Yang, Bishan, et al. "Embedding entities and relations for learning and inference in knowledge bases." arXiv preprint arXiv:1412.6575 (2014).^] -KGE an _inductive_ algorithm for computing node embeddings for knowledge graphs. -Knowledge graph means that graph can have multiple node labels and relationship types. +KGE is an _inductive_ algorithm for computing node embeddings for knowledge graphs. +A knowledge graph means that graph can have multiple node labels and relationship types. +Triple `(head, relation, tail)` is used to represent the relationship between two entities. The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. -KGEMs (Knowledge Graph Embedding Models) are used to learn low-dimensional representations of entities and relations in a knowledge graph. -The embeddings can be used to perform various tasks such as link prediction, entity classification, and entity clustering. -KGEM can return the score for the triple `(head, relation, tail)` which can be used to predict the missing tail entity for a given head and relation. +KGE models are used to learn low-dimensional representations of entities and relations in a knowledge graph. +The embeddings can be used to perform various tasks, such as link prediction, entity classification, and entity clustering. +A KGE model can return the score for the triple `(head, relation, tail)` which can be used to predict the missing tail entity for a given head and relation. * https://arxiv.org/pdf/1706.02216.pdf[William L. Hamilton, Rex Ying, and Jure Leskovec. "Inductive Representation Learning on Large Graphs." 2018.^] * https://arxiv.org/pdf/1911.10232.pdf[Amit Pande, Kai Ni and Venkataramani Kini. "SWAG: Item Recommendations using Convolutions on Weighted Graphs." 2019.^] -== Train API +[[algorithms-embeddings-kge-considerations]] +== Scoring functions + +For KGE algorithms, the scoring function is used to compute the score of a triple `(head, relation, tail)`. +This score is used for computing loss during training and during prediction to rank the tail entities for a given head and relation. + +The scoring function is a function of the embeddings of the head, relation, and tail entities. + +Let the embeddings of the head, relation, and tail entities be denoted by `h`, `r`, and `t` respectively. +To compute the score of the triple `(head, relation, tail)`, the scoring function takes the embeddings `h`, `r`, and `t` as input and computes a scalar score using the corresponding formula. + +=== `TransE` + +It is a translation-based model that represents the relationship between head and tail entities as a vector translation in the embedding space. +This method is effective for modeling anti-symmetric, inversion, and composition relations. + +The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: + +image::python-runtime/transe-formula.svg[width=280] + +=== `DistMult` + +This method assumes that the relation between head and tail entities is a dot-product multiplication in the embedding space. +This method works for modelling symmetric relations. + +The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: + +image::python-runtime/distmult-formula.svg[width=400] + + +== Considerations +To effectively train KGE models, several considerations need to be taken into account, including the choice of loss function, sampling methods, and optimization strategies. + +=== Loss function +The loss function is crucial for guiding the training process of KGE models. It determines how the difference between predicted and actual values is calculated and minimized. + +==== Margin ranking loss + +Margin ranking loss is a pairwise loss function that compares the scores based on the difference between the scores of a positive triple and a negative triple. +When negative sampling size is more than 1, the loss is computed for positive triple and each negative triple and the average loss is computed. + +image::python-runtime/mrl.svg[width=300] +image::python-runtime/delta-value.svg[width=400] + + +==== Negative Sampling Self-Adversarial Loss + +Negative Sampling Self-Adversarial Loss is a setwise loss function that compares the scores based on the difference between the scores of a positive triple and a set of negative triples. +`loss_function_kwargs` can be used to set the `adversarial_temperature` and `margin` parameters. + +* https://arxiv.org/pdf/1902.10197[Sun, Zhiqing, et al. "Rotate: Knowledge graph embedding by relational rotation in complex space." arXiv preprint arXiv:1902.10197 (2019).] + +=== Negative sampling + +Loss function requires negative samples to compute the loss. +The number of negative samples per positive sample is controlled by the `negative_sampling_size` parameter. +When `use_node_type_aware_sampler` is set to `True`, negative nodes are sampled with the same label as the corresponding positive node. +With or without node type awareness, the negative samples are sampled uniformly at random from the graph. + +=== Stochastic Local Closed World Assumption (sLCWA) + +Under this assumption, all positive triples are considered true and randomly sampled negative triples are considered false. + +* https://arxiv.org/pdf/2006.13365[Ali, M., Berrendorf, M., Hoyt, C. T., Vermue, L., Galkin, M., Sharifzadeh, S., ... & Lehmann, J. (2021). Bringing light into the dark: A large-scale evaluation of knowledge graph embedding models under a unified framework. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(12), 8825-8845.] + +=== Optimizer + +Any pytorch optimizer can be used for training the model. +To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. +All optimizer parameters except `params` can be passed as `optimizer_kwargs`. + +=== Learning rate scheduler + +Any pytorch learning rate scheduler can be used for training the model. +To use non-default learning rate scheduler, specify the scheduler class name as a string in the `lr_scheduler` parameter. +All scheduler parameters except `optimizer` can be passed as `lr_scheduler_kwargs`. + +=== Inner normalisation + +In original `TransE` paper in `Algorithm 1` line 5, the entity embeddings are normalized to have `Lp` norm of 1. +For some datasets, this normalization might not be beneficial. +To avoid this normalization, set `inner_norm` to `False`. + + +[[algorithms-embeddings-kge-syntax]] +== Syntax [source,python] ---- -G = gds.graph.load_fb15k237("train") gds.model.transe.train(G, - embedding_dimension = 256, - batch_size=512, - num_epochs=100, - optimizer='Adagrad', - optimizer_kwargs={'lr': 0.01}, - loss_function="MarginRanking", - loss_function_kwargs={'margin': 1.0}, + num_epochs = 10, ) ---- -[cols="1,1,1,4", options="header"] +.Parameters +[cols="1m,1m,1m,1", options="header"] |==== | Parameter | Type | Default value | Description | num_epochs | int -| +| N/A | Number of epochs for training (must be greater than 0) | epochs_per_checkpoint | int -| +| max(num_epochs / 10, 1) | Number of epochs between checkpoints (must be greater than or equal to 0) | load_from_checkpoint @@ -123,6 +199,11 @@ gds.model.transe.train(G, | 1 | Number of negative samples per positive sample +| use_node_type_aware_sampler +| bool +| False +| Whether to sample negative nodes with the same label as the corresponding positive node + | p_norm | float | 1.0 @@ -170,18 +251,17 @@ gds.model.transe.train(G, |==== -[[algorithms-embeddings-graph-sage-considerations]] -== Considerations +.Results +[opts="header",cols="2m,1,6"] +|=== +| Name | Type | Description +| modelInfo | Map | Details of the trained model. +| configuration | Map | The configuration used to run the procedure. +| trainMillis | Integer | Milliseconds to train the model. +|=== -=== Negative sampling - -=== sLCWA - Stochastic Local Closed World Assumption -=== Loss function - -=== Optimizer - -=== Learning rate scheduler - -=== Inner normalisation +[[algorithms-embeddings-kge-examples]] +== Examples +TODO \ No newline at end of file From 2d680823247b659c0bc414cbe8e08a5dd603e22e Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 10 Jun 2024 22:24:02 +0100 Subject: [PATCH 03/20] Reorganize doc page, add some term to be more specific --- .../knowledge-graph-embeddings.adoc | 120 ++++++++++-------- 1 file changed, 68 insertions(+), 52 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 1a99d5f60..29df81bc0 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -3,23 +3,22 @@ Knowledge Graph Embeddings (KGE) refer to a family of algorithms designed to learn low-dimensional representations of entities and relations within a knowledge graph. These embeddings are utilized for tasks including link prediction, entity classification, and entity clustering. -Python runtime provides the following scoring functions: +This chapter provides an overview of the available shallow embedding algorithms in the GDS Python runtime, such as `TransE` and `DistMult`. * `TransE` ** https://proceedings.neurips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).^] * `DistMult` ** https://arxiv.org/pdf/1412.6575[Yang, Bishan, et al. "Embedding entities and relations for learning and inference in knowledge bases." arXiv preprint arXiv:1412.6575 (2014).^] +A knowledge graph is a directed, multi-relational graph. +It consists of triples `(head, relation, tail)` where `head` and `tail` are entities and `relation` is the relationship between them. -KGE is an _inductive_ algorithm for computing node embeddings for knowledge graphs. -A knowledge graph means that graph can have multiple node labels and relationship types. -Triple `(head, relation, tail)` is used to represent the relationship between two entities. +Shallow models of KGE represented here are _inductive_ algorithms for computing node and relationship embeddings (low-dimensional representations) for knowledge graphs. +Shallow means that matrix lookups represent the entity and relation encoders. The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. -KGE models are used to learn low-dimensional representations of entities and relations in a knowledge graph. +A KGE model can return the score for the triple which can be used to predict the missing tail entity for a given head and relation. The embeddings can be used to perform various tasks, such as link prediction, entity classification, and entity clustering. -A KGE model can return the score for the triple `(head, relation, tail)` which can be used to predict the missing tail entity for a given head and relation. - * https://arxiv.org/pdf/1706.02216.pdf[William L. Hamilton, Rex Ying, and Jure Leskovec. "Inductive Representation Learning on Large Graphs." 2018.^] * https://arxiv.org/pdf/1911.10232.pdf[Amit Pande, Kai Ni and Venkataramani Kini. "SWAG: Item Recommendations using Convolutions on Weighted Graphs." 2019.^] @@ -38,6 +37,7 @@ To compute the score of the triple `(head, relation, tail)`, the scoring functio === `TransE` +`TransE` is a translational distance interaction model. It is a translation-based model that represents the relationship between head and tail entities as a vector translation in the embedding space. This method is effective for modeling anti-symmetric, inversion, and composition relations. @@ -47,6 +47,7 @@ image::python-runtime/transe-formula.svg[width=280] === `DistMult` +`DistMult` is a semantic matching interaction model. This method assumes that the relation between head and tail entities is a dot-product multiplication in the embedding space. This method works for modelling symmetric relations. @@ -58,13 +59,34 @@ image::python-runtime/distmult-formula.svg[width=400] == Considerations To effectively train KGE models, several considerations need to be taken into account, including the choice of loss function, sampling methods, and optimization strategies. +=== Stochastic Local Closed World Assumption (sLCWA) + +Observed triplets in the knowledge graph are considered true. +The unobserved triplets can be treated differently based on the assumption made. + +* Open World Assumption (OWA) assumes that all unobserved facts are unknown. + +* Closed World Assumption (CWA) assumes that all unobserved facts are false. + +* Local Closed World Assumption (LCWA) assumes that all observed facts are true. +All corrupted triplets, which are generated by replacing the head or tail entity of a positive triplet, are false. + +* Stochastic Local Closed World Assumption (sLCWA) assumes that all observed facts are true. +Some corrupted triplets are false and some are true. +The number of corrupted triplets for each true triplet is set by the `negative_sampling_size` parameter. + +Knowledge graph embedding models are trained under the sLCWA assumption in current implementation. + + === Loss function -The loss function is crucial for guiding the training process of KGE models. It determines how the difference between predicted and actual values is calculated and minimized. +The loss function is crucial for guiding the training process of KGE models. +It determines how the difference between predicted and actual values is calculated and minimized. +There are several loss functions that can be used for training KGE models, see below for more details. ==== Margin ranking loss Margin ranking loss is a pairwise loss function that compares the scores based on the difference between the scores of a positive triple and a negative triple. -When negative sampling size is more than 1, the loss is computed for positive triple and each negative triple and the average loss is computed. +When negative sampling size is more than one, the loss is computed for positive triple and each negative triple and the average loss is computed. image::python-runtime/mrl.svg[width=300] image::python-runtime/delta-value.svg[width=400] @@ -72,10 +94,15 @@ image::python-runtime/delta-value.svg[width=400] ==== Negative Sampling Self-Adversarial Loss -Negative Sampling Self-Adversarial Loss is a setwise loss function that compares the scores based on the difference between the scores of a positive triple and a set of negative triples. +Negative Sampling Self-Adversarial Lossfootnote:[Sun, Zhiqing, et al. "Rotate: Knowledge graph embedding by relational rotation in complex space." arXiv preprint arXiv:1902.10197 (2019).] is a setwise loss function that compares the scores based on the difference between the scores of a positive triple and a set of negative triples. `loss_function_kwargs` can be used to set the `adversarial_temperature` and `margin` parameters. -* https://arxiv.org/pdf/1902.10197[Sun, Zhiqing, et al. "Rotate: Knowledge graph embedding by relational rotation in complex space." arXiv preprint arXiv:1902.10197 (2019).] + +=== Optimizer + +Any pytorch optimizer can be used for training the model. +To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. +All optimizer parameters except `params` can be passed as `optimizer_kwargs`. === Negative sampling @@ -84,18 +111,6 @@ The number of negative samples per positive sample is controlled by the `negativ When `use_node_type_aware_sampler` is set to `True`, negative nodes are sampled with the same label as the corresponding positive node. With or without node type awareness, the negative samples are sampled uniformly at random from the graph. -=== Stochastic Local Closed World Assumption (sLCWA) - -Under this assumption, all positive triples are considered true and randomly sampled negative triples are considered false. - -* https://arxiv.org/pdf/2006.13365[Ali, M., Berrendorf, M., Hoyt, C. T., Vermue, L., Galkin, M., Sharifzadeh, S., ... & Lehmann, J. (2021). Bringing light into the dark: A large-scale evaluation of knowledge graph embedding models under a unified framework. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(12), 8825-8845.] - -=== Optimizer - -Any pytorch optimizer can be used for training the model. -To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. -All optimizer parameters except `params` can be passed as `optimizer_kwargs`. - === Learning rate scheduler Any pytorch learning rate scheduler can be used for training the model. @@ -104,7 +119,8 @@ All scheduler parameters except `optimizer` can be passed as `lr_scheduler_kwarg === Inner normalisation -In original `TransE` paper in `Algorithm 1` line 5, the entity embeddings are normalized to have `Lp` norm of 1. +In the original `TransE` paperfootnote:[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).] +in `Algorithm 1`, line 5, the entity embeddings are normalized to have `Lp` norm of 1. For some datasets, this normalization might not be beneficial. To avoid this normalization, set `inner_norm` to `False`. @@ -129,6 +145,11 @@ gds.model.transe.train(G, | N/A | Number of epochs for training (must be greater than 0) +| embedding_dimension +| int +| 256 +| Dimensionality of the embeddings (must be greater than 0) + | epochs_per_checkpoint | int | max(num_epochs / 10, 1) @@ -149,10 +170,10 @@ gds.model.transe.train(G, | "transe" | Function used to score embeddings -| embedding_dimension -| int -| 256 -| Dimensionality of the embeddings (must be greater than 0) +| p_norm +| float +| 1.0 +| Norm to use in TransE scoring function | batch_size | int @@ -164,11 +185,6 @@ gds.model.transe.train(G, | 512 | Size of the test batch (must be greater than 0) -| epochs_per_val -| int -| 50 -| Number of epochs between validations (must be greater than or equal to 0) - | optimizer | str | "adam" @@ -189,10 +205,15 @@ gds.model.transe.train(G, | {factor=1, total_iters=1000} | Additional arguments for the learning rate scheduler -| filtered_metrics -| bool -| False -| Whether to use filtered metrics during evaluation +| loss_function +| str +| MarginRanking +| Loss function to use for training + +| loss_function_kwargs +| dict[str, Any] +| {margin=1.0, adversarial_temperature=1.0, gamma=20.0} +| Additional arguments for the loss function | negative_sampling_size | int @@ -204,21 +225,6 @@ gds.model.transe.train(G, | False | Whether to sample negative nodes with the same label as the corresponding positive node -| p_norm -| float -| 1.0 -| Norm to use in TransE scoring function - -| loss_function -| str -| MarginRanking -| Loss function to use for training - -| loss_function_kwargs -| dict[str, Any] -| {margin=1.0, adversarial_temperature=1.0, gamma=20.0} -| Additional arguments for the loss function - | k_value | int | 10 @@ -234,6 +240,16 @@ gds.model.transe.train(G, | True | Whether to perform testing +| filtered_metrics +| bool +| False +| Whether to use filtered metrics during evaluation + +| epochs_per_val +| int +| 50 +| Number of epochs between validations (must be greater than or equal to 0) + | disable_tqdm | bool | True From dad8bc5e309986b5a6931a9d87524e4fdf60c774 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Tue, 11 Jun 2024 10:23:33 +0100 Subject: [PATCH 04/20] Make checkstyle happy --- graphdatascience/graph/base_graph_proc_runner.py | 3 ++- graphdatascience/session/aura_api_responses.py | 1 - graphdatascience/tests/unit/test_aura_api.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/graphdatascience/graph/base_graph_proc_runner.py b/graphdatascience/graph/base_graph_proc_runner.py index 6d5531580..280e2a16b 100644 --- a/graphdatascience/graph/base_graph_proc_runner.py +++ b/graphdatascience/graph/base_graph_proc_runner.py @@ -517,7 +517,8 @@ def writeRelationship( ).squeeze() @multimethod - def removeNodeProperties(self) -> None: ... + def removeNodeProperties(self) -> None: + ... @removeNodeProperties.register @graph_type_check diff --git a/graphdatascience/session/aura_api_responses.py b/graphdatascience/session/aura_api_responses.py index cd533a192..b54b7d889 100644 --- a/graphdatascience/session/aura_api_responses.py +++ b/graphdatascience/session/aura_api_responses.py @@ -164,7 +164,6 @@ def from_json(cls, json: Dict[str, Any]) -> TenantDetails: # datetime.fromisoformat only works with Python version > 3.9 class TimeParser: - @staticmethod def fromisoformat(date: str) -> datetime: if sys.version_info >= (3, 11): diff --git a/graphdatascience/tests/unit/test_aura_api.py b/graphdatascience/tests/unit/test_aura_api.py index 5b87816ca..88a0bf773 100644 --- a/graphdatascience/tests/unit/test_aura_api.py +++ b/graphdatascience/tests/unit/test_aura_api.py @@ -53,7 +53,6 @@ def test_create_session(requests_mock: Mocker) -> None: def test_list_session(requests_mock: Mocker) -> None: - api = AuraApi(client_id="", client_secret="", tenant_id="some-tenant") mock_auth_token(requests_mock) From 1a90dfeaf26e712a8e3b1f0e734868c5fd6757c0 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Tue, 11 Jun 2024 13:59:50 +0100 Subject: [PATCH 05/20] Apply suggestions from code review Co-authored-by: Adam Schill Collberg Co-authored-by: Adam Schill Collberg <53664324+adamnsch@users.noreply.github.com> --- .../pages/python-runtime/knowledge-graph-embeddings.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 29df81bc0..8887d0ac8 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -1,7 +1,7 @@ = Knowledge graph embeddings Knowledge Graph Embeddings (KGE) refer to a family of algorithms designed to learn low-dimensional representations of entities and relations within a knowledge graph. -These embeddings are utilized for tasks including link prediction, entity classification, and entity clustering. +These embeddings are utilized for tasks such as link prediction, entity classification, and entity clustering. This chapter provides an overview of the available shallow embedding algorithms in the GDS Python runtime, such as `TransE` and `DistMult`. @@ -86,7 +86,7 @@ There are several loss functions that can be used for training KGE models, see b ==== Margin ranking loss Margin ranking loss is a pairwise loss function that compares the scores based on the difference between the scores of a positive triple and a negative triple. -When negative sampling size is more than one, the loss is computed for positive triple and each negative triple and the average loss is computed. +When negative sampling size is more than one, the loss is computed for a positive triple and each of its negative triples, and the average loss is computed. image::python-runtime/mrl.svg[width=300] image::python-runtime/delta-value.svg[width=400] @@ -106,7 +106,7 @@ All optimizer parameters except `params` can be passed as `optimizer_kwargs`. === Negative sampling -Loss function requires negative samples to compute the loss. +The loss function requires negative samples to compute the loss. The number of negative samples per positive sample is controlled by the `negative_sampling_size` parameter. When `use_node_type_aware_sampler` is set to `True`, negative nodes are sampled with the same label as the corresponding positive node. With or without node type awareness, the negative samples are sampled uniformly at random from the graph. @@ -193,7 +193,7 @@ gds.model.transe.train(G, | optimizer_kwargs | dict[str, Any] | {lr=0.01, weight_decay=0.0005} -| Additional arguments for the optimizer +| Arguments for the optimizer | lr_scheduler | str From 18c2f837f98a2ef28ff3d9ca7cb47bb6eae4e318 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Tue, 11 Jun 2024 23:01:26 +0100 Subject: [PATCH 06/20] Modify docs after Adam's review --- .../knowledge-graph-embeddings.adoc | 43 +++++++++++-------- .../graph/base_graph_proc_runner.py | 3 +- .../session/aura_api_responses.py | 1 + graphdatascience/tests/unit/test_aura_api.py | 1 + 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 8887d0ac8..51127addb 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -6,23 +6,20 @@ These embeddings are utilized for tasks such as link prediction, entity classifi This chapter provides an overview of the available shallow embedding algorithms in the GDS Python runtime, such as `TransE` and `DistMult`. * `TransE` -** https://proceedings.neurips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).^] +** Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013). * `DistMult` -** https://arxiv.org/pdf/1412.6575[Yang, Bishan, et al. "Embedding entities and relations for learning and inference in knowledge bases." arXiv preprint arXiv:1412.6575 (2014).^] +** Yang, Bishan, et al. "Embedding entities and relations for learning and inference in knowledge bases." arXiv preprint arXiv:1412.6575 (2014) A knowledge graph is a directed, multi-relational graph. It consists of triples `(head, relation, tail)` where `head` and `tail` are entities and `relation` is the relationship between them. -Shallow models of KGE represented here are _inductive_ algorithms for computing node and relationship embeddings (low-dimensional representations) for knowledge graphs. +Shallow models of KGE represented here are _transductive_ algorithms for computing node and relationship embeddings (low-dimensional representations) for knowledge graphs. Shallow means that matrix lookups represent the entity and relation encoders. The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. -A KGE model can return the score for the triple which can be used to predict the missing tail entity for a given head and relation. +A KGE model can return the score for the triple which can be used to predict the missing tail entity for a given head and relationship type. The embeddings can be used to perform various tasks, such as link prediction, entity classification, and entity clustering. -* https://arxiv.org/pdf/1706.02216.pdf[William L. Hamilton, Rex Ying, and Jure Leskovec. "Inductive Representation Learning on Large Graphs." 2018.^] -* https://arxiv.org/pdf/1911.10232.pdf[Amit Pande, Kai Ni and Venkataramani Kini. "SWAG: Item Recommendations using Convolutions on Weighted Graphs." 2019.^] - [[algorithms-embeddings-kge-considerations]] == Scoring functions @@ -35,6 +32,7 @@ The scoring function is a function of the embeddings of the head, relation, and Let the embeddings of the head, relation, and tail entities be denoted by `h`, `r`, and `t` respectively. To compute the score of the triple `(head, relation, tail)`, the scoring function takes the embeddings `h`, `r`, and `t` as input and computes a scalar score using the corresponding formula. + === `TransE` `TransE` is a translational distance interaction model. @@ -45,6 +43,7 @@ The formula to compute the score of a triple `(head, relation, tail)` using the image::python-runtime/transe-formula.svg[width=280] + === `DistMult` `DistMult` is a semantic matching interaction model. @@ -59,6 +58,7 @@ image::python-runtime/distmult-formula.svg[width=400] == Considerations To effectively train KGE models, several considerations need to be taken into account, including the choice of loss function, sampling methods, and optimization strategies. + === Stochastic Local Closed World Assumption (sLCWA) Observed triplets in the knowledge graph are considered true. @@ -83,6 +83,7 @@ The loss function is crucial for guiding the training process of KGE models. It determines how the difference between predicted and actual values is calculated and minimized. There are several loss functions that can be used for training KGE models, see below for more details. + ==== Margin ranking loss Margin ranking loss is a pairwise loss function that compares the scores based on the difference between the scores of a positive triple and a negative triple. @@ -104,6 +105,7 @@ Any pytorch optimizer can be used for training the model. To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. All optimizer parameters except `params` can be passed as `optimizer_kwargs`. + === Negative sampling The loss function requires negative samples to compute the loss. @@ -111,12 +113,14 @@ The number of negative samples per positive sample is controlled by the `negativ When `use_node_type_aware_sampler` is set to `True`, negative nodes are sampled with the same label as the corresponding positive node. With or without node type awareness, the negative samples are sampled uniformly at random from the graph. + === Learning rate scheduler Any pytorch learning rate scheduler can be used for training the model. To use non-default learning rate scheduler, specify the scheduler class name as a string in the `lr_scheduler` parameter. All scheduler parameters except `optimizer` can be passed as `lr_scheduler_kwargs`. + === Inner normalisation In the original `TransE` paperfootnote:[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).] @@ -125,13 +129,23 @@ For some datasets, this normalization might not be beneficial. To avoid this normalization, set `inner_norm` to `False`. +=== Filtered metrics + +When we evaluate (compute metrics) the model, we compute scores for all possible triples with the same head or tail and relationship type as the test triple. +Ranking the test triple among other triples is used for computing metrics, such as Mean Rank, Mean Reciprocal Rank, and Hit@k. + +When `filtered_metrics` is set to `False`, the ranking is done among all possible triples. + +When `filtered_metrics` is set to `True`, the ranking is done among only the triples that are not present in the training set. + [[algorithms-embeddings-kge-syntax]] == Syntax [source,python] ---- -gds.model.transe.train(G, +gds.model.train(G, num_epochs = 10, + embedding_dimension = 100, ) ---- @@ -147,7 +161,7 @@ gds.model.transe.train(G, | embedding_dimension | int -| 256 +| N/A | Dimensionality of the embeddings (must be greater than 0) | epochs_per_checkpoint @@ -243,27 +257,22 @@ gds.model.transe.train(G, | filtered_metrics | bool | False -| Whether to use filtered metrics during evaluation +| Whether to use filtered metrics during evaluation, see <<_filtered_metrics, filtered metrics>> | epochs_per_val | int | 50 | Number of epochs between validations (must be greater than or equal to 0) -| disable_tqdm -| bool -| True -| Whether to disable tqdm progress bars - | inner_norm | bool | True -| Whether to apply normalization to embeddings +| Whether to apply normalization to embeddings, see <<_inner_normalisation, inner normalisation>> | init_bound | Optional[float] | None -| Initial bound for embeddings (if any) +| The value for the range [-bound; bound] of the uniform distribution used to initialize the embeddings. Xavier's initialization is used if None. |==== diff --git a/graphdatascience/graph/base_graph_proc_runner.py b/graphdatascience/graph/base_graph_proc_runner.py index 280e2a16b..6d5531580 100644 --- a/graphdatascience/graph/base_graph_proc_runner.py +++ b/graphdatascience/graph/base_graph_proc_runner.py @@ -517,8 +517,7 @@ def writeRelationship( ).squeeze() @multimethod - def removeNodeProperties(self) -> None: - ... + def removeNodeProperties(self) -> None: ... @removeNodeProperties.register @graph_type_check diff --git a/graphdatascience/session/aura_api_responses.py b/graphdatascience/session/aura_api_responses.py index b54b7d889..cd533a192 100644 --- a/graphdatascience/session/aura_api_responses.py +++ b/graphdatascience/session/aura_api_responses.py @@ -164,6 +164,7 @@ def from_json(cls, json: Dict[str, Any]) -> TenantDetails: # datetime.fromisoformat only works with Python version > 3.9 class TimeParser: + @staticmethod def fromisoformat(date: str) -> datetime: if sys.version_info >= (3, 11): diff --git a/graphdatascience/tests/unit/test_aura_api.py b/graphdatascience/tests/unit/test_aura_api.py index 88a0bf773..5b87816ca 100644 --- a/graphdatascience/tests/unit/test_aura_api.py +++ b/graphdatascience/tests/unit/test_aura_api.py @@ -53,6 +53,7 @@ def test_create_session(requests_mock: Mocker) -> None: def test_list_session(requests_mock: Mocker) -> None: + api = AuraApi(client_id="", client_secret="", tenant_id="some-tenant") mock_auth_token(requests_mock) From 350c4dce6cb4037b92490ed10e3d76c4b70e6053 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Wed, 12 Jun 2024 13:18:11 +0100 Subject: [PATCH 07/20] Add more information about splitting and p_norm usage --- .../pages/python-runtime/knowledge-graph-embeddings.adoc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 51127addb..bc6a70f7f 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -125,6 +125,7 @@ All scheduler parameters except `optimizer` can be passed as `lr_scheduler_kwarg In the original `TransE` paperfootnote:[Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013).] in `Algorithm 1`, line 5, the entity embeddings are normalized to have `Lp` norm of 1. +Value of `p` is set by the `p_norm` parameter. For some datasets, this normalization might not be beneficial. To avoid this normalization, set `inner_norm` to `False`. @@ -177,7 +178,10 @@ gds.model.train(G, | split_ratios | dict[str, float] | {TRAIN=0.8, TEST=0.2} -| Ratios for splitting the dataset into training and test sets +| Ratios for splitting the dataset into training and test sets. +When sum of values is less than 1, the remaining data is used for validation. +Validation size can be set explicitly with the key `VALID`. +When all three keys are present, the sum of values must be equal to 1. | scoring_function | str @@ -267,7 +271,7 @@ gds.model.train(G, | inner_norm | bool | True -| Whether to apply normalization to embeddings, see <<_inner_normalisation, inner normalisation>> +| Whether to apply normalization to embeddings, see <<_inner_normalisation, inner normalization>> | init_bound | Optional[float] From a36669344e33e595eac1895120a6e862dcca0aa6 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Wed, 12 Jun 2024 13:44:35 +0100 Subject: [PATCH 08/20] Add comment about KGE into algorithms page --- doc/modules/ROOT/pages/algorithms.adoc | 9 +++++++++ doc/modules/ROOT/pages/python-runtime/introduction.adoc | 7 ------- 2 files changed, 9 insertions(+), 7 deletions(-) delete mode 100644 doc/modules/ROOT/pages/python-runtime/introduction.adoc diff --git a/doc/modules/ROOT/pages/algorithms.adoc b/doc/modules/ROOT/pages/algorithms.adoc index 670bca25f..11afa6ca2 100644 --- a/doc/modules/ROOT/pages/algorithms.adoc +++ b/doc/modules/ROOT/pages/algorithms.adoc @@ -72,6 +72,15 @@ assert fastrp_result["nodePropertiesWritten"] == G.node_count() Some algorithms deviate from the standard syntactic structure. We describe how to use them in the Python client in the sections below. +// // Suppose to be uncommented when the algorithms are available. +// [NOTE] +// ==== +// There are some algorithm which are available for xref:gds-session.adoc[GDS Sessions] only. +// See the list: +// +// include::ROOT:partial$python-runtime-algorithms.adoc[] +// ==== + [[algorithms-execution-mode]] == Execution modes diff --git a/doc/modules/ROOT/pages/python-runtime/introduction.adoc b/doc/modules/ROOT/pages/python-runtime/introduction.adoc deleted file mode 100644 index 177052b96..000000000 --- a/doc/modules/ROOT/pages/python-runtime/introduction.adoc +++ /dev/null @@ -1,7 +0,0 @@ -= Introduction - -Python runtime works with GDS sessions. - -The list of available algorithms: - -include::ROOT:partial$python-runtime-algorithms.adoc[] \ No newline at end of file From 3e6c966c5b6a0023a4733b9f577aec887881c912 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Wed, 12 Jun 2024 13:47:23 +0100 Subject: [PATCH 09/20] Remove explicit page from nav --- doc/modules/ROOT/content-nav.adoc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/doc/modules/ROOT/content-nav.adoc b/doc/modules/ROOT/content-nav.adoc index 4a8a3917a..134634114 100644 --- a/doc/modules/ROOT/content-nav.adoc +++ b/doc/modules/ROOT/content-nav.adoc @@ -4,11 +4,6 @@ * xref:graph-object.adoc[] * xref:algorithms.adoc[] * xref:pipelines.adoc[] -* xref:python-runtime/introduction.adoc[Python Runtime Algorithms] -+ --- -include::partial$python-runtime-algorithms.adoc[] --- * xref:model-object.adoc[] * xref:common-datasets.adoc[] * xref:rel-embedding-models.adoc[] From 880ab30cdae91b2fb6c92d4624743ee8249271c4 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 17 Jun 2024 11:59:44 +0100 Subject: [PATCH 10/20] no run for code pieces --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index bc6a70f7f..2ee5f9126 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -142,7 +142,7 @@ When `filtered_metrics` is set to `True`, the ranking is done among only the tri [[algorithms-embeddings-kge-syntax]] == Syntax -[source,python] +[source, python, role=no-test] ---- gds.model.train(G, num_epochs = 10, From 94d9a0c56fdd89691d95c4d41fc1945bd38ef563 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 11:07:56 +0100 Subject: [PATCH 11/20] Fix wording in doc --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 2ee5f9126..86590b7a9 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -13,7 +13,7 @@ This chapter provides an overview of the available shallow embedding algorithms A knowledge graph is a directed, multi-relational graph. It consists of triples `(head, relation, tail)` where `head` and `tail` are entities and `relation` is the relationship between them. -Shallow models of KGE represented here are _transductive_ algorithms for computing node and relationship embeddings (low-dimensional representations) for knowledge graphs. +Shallow models of KGE represented here are _transductive_ algorithms for computing node and relationship type embeddings (low-dimensional representations) for knowledge graphs. Shallow means that matrix lookups represent the entity and relation encoders. The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. From 7148e70f07c2301277a7e216a8f8b7b51b19050f Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 11:41:57 +0100 Subject: [PATCH 12/20] Update doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc Co-authored-by: Adam Schill Collberg <53664324+adamnsch@users.noreply.github.com> --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 86590b7a9..224b1921c 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -15,7 +15,6 @@ It consists of triples `(head, relation, tail)` where `head` and `tail` are enti Shallow models of KGE represented here are _transductive_ algorithms for computing node and relationship type embeddings (low-dimensional representations) for knowledge graphs. Shallow means that matrix lookups represent the entity and relation encoders. -The algorithm is inductive, meaning that it can be trained on one graph and then applied to this exact graph or another graph with the same schema. A KGE model can return the score for the triple which can be used to predict the missing tail entity for a given head and relationship type. The embeddings can be used to perform various tasks, such as link prediction, entity classification, and entity clustering. From e24f53b1d5264049b84fd3f587a56387b9b4aeee Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:26:16 +0100 Subject: [PATCH 13/20] Update doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc Co-authored-by: Adam Schill Collberg <53664324+adamnsch@users.noreply.github.com> --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 224b1921c..ecd2e9cb8 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -35,7 +35,7 @@ To compute the score of the triple `(head, relation, tail)`, the scoring functio === `TransE` `TransE` is a translational distance interaction model. -It is a translation-based model that represents the relationship between head and tail entities as a vector translation in the embedding space. +It represents the relationship between head and tail entities as a vector translation in the embedding space. This method is effective for modeling anti-symmetric, inversion, and composition relations. The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: From e70551eef1ccb076f825391be478bc24ca766a23 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:32:22 +0100 Subject: [PATCH 14/20] Update doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc Co-authored-by: Adam Schill Collberg <53664324+adamnsch@users.noreply.github.com> --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index ecd2e9cb8..44d4231bb 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -74,7 +74,7 @@ All corrupted triplets, which are generated by replacing the head or tail entity Some corrupted triplets are false and some are true. The number of corrupted triplets for each true triplet is set by the `negative_sampling_size` parameter. -Knowledge graph embedding models are trained under the sLCWA assumption in current implementation. +Knowledge graph embedding models are trained under the sLCWA assumption in the GDS implementation. === Loss function From ddf853c39bb19d60c261bc8435e4ae98aadfac56 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:33:14 +0100 Subject: [PATCH 15/20] Add some doc changes and reformulations --- .../pages/python-runtime/knowledge-graph-embeddings.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 44d4231bb..304b17ac4 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -24,7 +24,7 @@ The embeddings can be used to perform various tasks, such as link prediction, en == Scoring functions For KGE algorithms, the scoring function is used to compute the score of a triple `(head, relation, tail)`. -This score is used for computing loss during training and during prediction to rank the tail entities for a given head and relation. +This score is used for computing loss during training and during prediction to rank the tail entities for a given head and relationship type. The scoring function is a function of the embeddings of the head, relation, and tail entities. @@ -46,7 +46,7 @@ image::python-runtime/transe-formula.svg[width=280] === `DistMult` `DistMult` is a semantic matching interaction model. -This method assumes that the relation between head and tail entities is a dot-product multiplication in the embedding space. +This method scores the triple by computing the dot-product multiplication of the embeddings of the head, relationship type, and tail entities. This method works for modelling symmetric relations. The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: @@ -55,6 +55,7 @@ image::python-runtime/distmult-formula.svg[width=400] == Considerations + To effectively train KGE models, several considerations need to be taken into account, including the choice of loss function, sampling methods, and optimization strategies. @@ -78,6 +79,7 @@ Knowledge graph embedding models are trained under the sLCWA assumption in the G === Loss function + The loss function is crucial for guiding the training process of KGE models. It determines how the difference between predicted and actual values is calculated and minimized. There are several loss functions that can be used for training KGE models, see below for more details. From e69b611a3016b49837a98590644d8b5a4a6a8512 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:38:34 +0100 Subject: [PATCH 16/20] Apply suggestions from code review Co-authored-by: Adam Schill Collberg Co-authored-by: Adam Schill Collberg <53664324+adamnsch@users.noreply.github.com> --- .../knowledge-graph-embeddings.adoc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 304b17ac4..ded2b8cf8 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -102,7 +102,7 @@ Negative Sampling Self-Adversarial Lossfootnote:[Sun, Zhiqing, et al. "Rotate: K === Optimizer -Any pytorch optimizer can be used for training the model. +Any PyTorch optimizer can be used for training the model. To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. All optimizer parameters except `params` can be passed as `optimizer_kwargs`. @@ -117,7 +117,7 @@ With or without node type awareness, the negative samples are sampled uniformly === Learning rate scheduler -Any pytorch learning rate scheduler can be used for training the model. +Any PyTorch learning rate scheduler can be used for training the model. To use non-default learning rate scheduler, specify the scheduler class name as a string in the `lr_scheduler` parameter. All scheduler parameters except `optimizer` can be passed as `lr_scheduler_kwargs`. @@ -133,8 +133,8 @@ To avoid this normalization, set `inner_norm` to `False`. === Filtered metrics -When we evaluate (compute metrics) the model, we compute scores for all possible triples with the same head or tail and relationship type as the test triple. -Ranking the test triple among other triples is used for computing metrics, such as Mean Rank, Mean Reciprocal Rank, and Hit@k. +When we evaluate (compute metrics of) the model on the test set, we compute scores for all possible triples with the same head or tail and relationship type as the test triple. +Ranking the test triple among other triples is used for computing metrics, such as Mean Rank, Mean Reciprocal Rank, and Hits@k. When `filtered_metrics` is set to `False`, the ranking is done among all possible triples. @@ -180,14 +180,14 @@ gds.model.train(G, | dict[str, float] | {TRAIN=0.8, TEST=0.2} | Ratios for splitting the dataset into training and test sets. -When sum of values is less than 1, the remaining data is used for validation. -Validation size can be set explicitly with the key `VALID`. -When all three keys are present, the sum of values must be equal to 1. +When the sum of the ratios is less than 1.0, the remaining examples are used for validation. +The validation set ratio can be set explicitly with the key `VALID`. +When all three keys are present, the sum of values must be equal to 1.0. | scoring_function | str | "transe" -| Function used to score embeddings +| Function used to score embeddings of triples | p_norm | float @@ -247,7 +247,7 @@ When all three keys are present, the sum of values must be equal to 1. | k_value | int | 10 -| Value of k used in evaluation metrics +| Value of k used in Hits@k evaluation metric | do_validation | bool @@ -277,7 +277,7 @@ When all three keys are present, the sum of values must be equal to 1. | init_bound | Optional[float] | None -| The value for the range [-bound; bound] of the uniform distribution used to initialize the embeddings. Xavier's initialization is used if None. +| The value for the range [-init_bound; init_bound] of the uniform distribution used to initialize the embeddings. Xavier initialization is used if None. |==== From 5584ff8d2b95a1d71aa7886016f52a75b2477da6 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:39:20 +0100 Subject: [PATCH 17/20] Add API alignment, removed results chapter --- .../python-runtime/knowledge-graph-embeddings.adoc | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index ded2b8cf8..94b7c258f 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -145,7 +145,7 @@ When `filtered_metrics` is set to `True`, the ranking is done among only the tri [source, python, role=no-test] ---- -gds.model.train(G, +gds.kge.model.train(G, num_epochs = 10, embedding_dimension = 100, ) @@ -281,17 +281,6 @@ When all three keys are present, the sum of values must be equal to 1.0. |==== -.Results -[opts="header",cols="2m,1,6"] -|=== -| Name | Type | Description -| modelInfo | Map | Details of the trained model. -| configuration | Map | The configuration used to run the procedure. -| trainMillis | Integer | Milliseconds to train the model. -|=== - - - [[algorithms-embeddings-kge-examples]] == Examples TODO \ No newline at end of file From c710e2367ba2b878bba1d039b72bda94b908b1d6 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 12:44:58 +0100 Subject: [PATCH 18/20] Change optimizer description --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 94b7c258f..3e23b2976 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -102,7 +102,8 @@ Negative Sampling Self-Adversarial Lossfootnote:[Sun, Zhiqing, et al. "Rotate: K === Optimizer -Any PyTorch optimizer can be used for training the model. +Several optimizers are available, such as `Adam`, `SGD`, and `Adagrad`. +Their parameters are aligned with the correspond PyTorch optimizer parameters. To use non-default optimizer, specify the optimizer class name as a string in the `optimizer` parameter. All optimizer parameters except `params` can be passed as `optimizer_kwargs`. From de258a089af638ff42f148350930d3e1b872c177 Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Mon, 24 Jun 2024 13:12:06 +0100 Subject: [PATCH 19/20] Add Xavier init note --- .../ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc index 3e23b2976..671bb2ccf 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc @@ -278,7 +278,9 @@ When all three keys are present, the sum of values must be equal to 1.0. | init_bound | Optional[float] | None -| The value for the range [-init_bound; init_bound] of the uniform distribution used to initialize the embeddings. Xavier initialization is used if None. +| The value for the range [-init_bound; init_bound] of the uniform distribution used to initialize the embeddings. +Xavier initializationfootnote:[Xavier Glorot, Yoshua Bengio "Understanding the difficulty of training deep feedforward neural networks" Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, PMLR 9:249-256, 2010] + is used if None. |==== From 26d81c3ad6b99d8b0fbac89c3d54d8aebc7ed68d Mon Sep 17 00:00:00 2001 From: Olga Razvenskaia Date: Tue, 25 Jun 2024 11:57:01 +0100 Subject: [PATCH 20/20] Remove "runtime" word --- .../kge}/delta-value.svg | 0 .../kge}/distmult-formula.svg | 0 .../kge}/mrl.svg | 0 .../kge}/transe-formula.svg | 0 doc/modules/ROOT/pages/algorithms.adoc | 2 +- .../knowledge-graph-embeddings.adoc | 10 +++++----- .../ROOT/partials/python-runtime-algorithms.adoc | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename doc/modules/ROOT/images/{python-runtime => gds-session-algorithms/kge}/delta-value.svg (100%) rename doc/modules/ROOT/images/{python-runtime => gds-session-algorithms/kge}/distmult-formula.svg (100%) rename doc/modules/ROOT/images/{python-runtime => gds-session-algorithms/kge}/mrl.svg (100%) rename doc/modules/ROOT/images/{python-runtime => gds-session-algorithms/kge}/transe-formula.svg (100%) rename doc/modules/ROOT/pages/{python-runtime => gds-session-algorithms}/knowledge-graph-embeddings.adoc (97%) diff --git a/doc/modules/ROOT/images/python-runtime/delta-value.svg b/doc/modules/ROOT/images/gds-session-algorithms/kge/delta-value.svg similarity index 100% rename from doc/modules/ROOT/images/python-runtime/delta-value.svg rename to doc/modules/ROOT/images/gds-session-algorithms/kge/delta-value.svg diff --git a/doc/modules/ROOT/images/python-runtime/distmult-formula.svg b/doc/modules/ROOT/images/gds-session-algorithms/kge/distmult-formula.svg similarity index 100% rename from doc/modules/ROOT/images/python-runtime/distmult-formula.svg rename to doc/modules/ROOT/images/gds-session-algorithms/kge/distmult-formula.svg diff --git a/doc/modules/ROOT/images/python-runtime/mrl.svg b/doc/modules/ROOT/images/gds-session-algorithms/kge/mrl.svg similarity index 100% rename from doc/modules/ROOT/images/python-runtime/mrl.svg rename to doc/modules/ROOT/images/gds-session-algorithms/kge/mrl.svg diff --git a/doc/modules/ROOT/images/python-runtime/transe-formula.svg b/doc/modules/ROOT/images/gds-session-algorithms/kge/transe-formula.svg similarity index 100% rename from doc/modules/ROOT/images/python-runtime/transe-formula.svg rename to doc/modules/ROOT/images/gds-session-algorithms/kge/transe-formula.svg diff --git a/doc/modules/ROOT/pages/algorithms.adoc b/doc/modules/ROOT/pages/algorithms.adoc index 11afa6ca2..061ecbb85 100644 --- a/doc/modules/ROOT/pages/algorithms.adoc +++ b/doc/modules/ROOT/pages/algorithms.adoc @@ -78,7 +78,7 @@ We describe how to use them in the Python client in the sections below. // There are some algorithm which are available for xref:gds-session.adoc[GDS Sessions] only. // See the list: // -// include::ROOT:partial$python-runtime-algorithms.adoc[] +// include::ROOT:partial$gds-session-algorithms-algorithms.adoc[] // ==== diff --git a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc b/doc/modules/ROOT/pages/gds-session-algorithms/knowledge-graph-embeddings.adoc similarity index 97% rename from doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc rename to doc/modules/ROOT/pages/gds-session-algorithms/knowledge-graph-embeddings.adoc index 671bb2ccf..476d2f486 100644 --- a/doc/modules/ROOT/pages/python-runtime/knowledge-graph-embeddings.adoc +++ b/doc/modules/ROOT/pages/gds-session-algorithms/knowledge-graph-embeddings.adoc @@ -3,7 +3,7 @@ Knowledge Graph Embeddings (KGE) refer to a family of algorithms designed to learn low-dimensional representations of entities and relations within a knowledge graph. These embeddings are utilized for tasks such as link prediction, entity classification, and entity clustering. -This chapter provides an overview of the available shallow embedding algorithms in the GDS Python runtime, such as `TransE` and `DistMult`. +This chapter provides an overview of the available shallow embedding algorithms under the GDS Session, such as `TransE` and `DistMult`. * `TransE` ** Bordes, Antoine, et al. "Translating embeddings for modeling multi-relational data." Advances in neural information processing systems 26 (2013). @@ -40,7 +40,7 @@ This method is effective for modeling anti-symmetric, inversion, and composition The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: -image::python-runtime/transe-formula.svg[width=280] +image::gds-session-algorithms/kge/transe-formula.svg[width=280] === `DistMult` @@ -51,7 +51,7 @@ This method works for modelling symmetric relations. The formula to compute the score of a triple `(head, relation, tail)` using the embeddings `h`, `r`, and `t`: -image::python-runtime/distmult-formula.svg[width=400] +image::gds-session-algorithms/kge/distmult-formula.svg[width=400] == Considerations @@ -90,8 +90,8 @@ There are several loss functions that can be used for training KGE models, see b Margin ranking loss is a pairwise loss function that compares the scores based on the difference between the scores of a positive triple and a negative triple. When negative sampling size is more than one, the loss is computed for a positive triple and each of its negative triples, and the average loss is computed. -image::python-runtime/mrl.svg[width=300] -image::python-runtime/delta-value.svg[width=400] +image::gds-session-algorithms/kge/mrl.svg[width=300] +image::gds-session-algorithms/kge/delta-value.svg[width=400] ==== Negative Sampling Self-Adversarial Loss diff --git a/doc/modules/ROOT/partials/python-runtime-algorithms.adoc b/doc/modules/ROOT/partials/python-runtime-algorithms.adoc index 84ce632f8..48f39c094 100644 --- a/doc/modules/ROOT/partials/python-runtime-algorithms.adoc +++ b/doc/modules/ROOT/partials/python-runtime-algorithms.adoc @@ -1 +1 @@ -* xref:python-runtime/knowledge-graph-embeddings.adoc[Knowledge Graph Embeddings] \ No newline at end of file +* xref:gds-session-algorithms/knowledge-graph-embeddings.adoc[Knowledge Graph Embeddings] \ No newline at end of file