In [78]:
# Cosinus semantic similarity of 'ontology' definitions

In [79]:
import torch

def format_pytorch_version(version):
    return version.split('+')[0]

def format_cuda_version(version):
    return 'cu' + version.replace('.', '')

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)
CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

In [81]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.12.1+cu113.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.12.1+cu113.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.12.1+cu113.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://pytorch-geometric.com/whl/torch-1.12.1+cu113.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [82]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [83]:
pip install sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [84]:
import torch_geometric

torch_geometric.__version__


'2.1.0'

In [85]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'axes.facecolor':'dimgrey', 'grid.color':'lightgrey'})
import numpy as np
import pandas as pd
import networkx as nx
import torch.nn.functional as F
import torch.nn as nn
import torch_scatter
from sentence_transformers import SentenceTransformer, util
from torch_geometric.data import Data 
from torch_geometric.utils import to_undirected

In [None]:
# Model Selection and Initialization

In [87]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
# Semantic similarity between two sentences

In [88]:
sentence1 = "An explicit specification of a conceptualization."
sentence2 = "A systematic account of existence."

# encode sentences to get their embeddings
embedding1 = model.encode(sentence1, convert_to_tensor=True)
embedding2 = model.encode(sentence2, convert_to_tensor=True)

# cosinus similarity scores of two embeddings
cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
print("Sentence 1:", sentence1)
print("Sentence 2:", sentence2)
print("Similarity score:", cosine_scores.item())

Sentence 1: An explicit specification of a conceptualization.
Sentence 2: A systematic account of existence.
Similarity score: 0.4527572989463806


In [None]:
# Semantic similarity between two lists of sentences

In [90]:
sentences1 = ["An explicit specification of a conceptualization.", "Set of representational primitives with which to model a domain of knowledge or discourse."]   
sentences2 = ["A formal naming and definition of the types, properties, and interrelationships of the entities.", "A systematic account of existence."]

# encode list of sentences to get their embeddings
embedding1 = model.encode(sentences1, convert_to_tensor=True)
embedding2 = model.encode(sentences2, convert_to_tensor=True)

# compute similarity scores of two embeddings
cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
for i in range(len(sentences1)):
    for j in range(len(sentences2)):
        print("Sentence 1:", sentences1[i])
        print("Sentence 2:", sentences2[j])
        print("Similarity Score:", cosine_scores[i][j].item())
        print()

Sentence 1: An explicit specification of a conceptualization.
Sentence 2: A formal naming and definition of the types, properties, and interrelationships of the entities.
Similarity Score: 0.5923953652381897

Sentence 1: An explicit specification of a conceptualization.
Sentence 2: A systematic account of existence.
Similarity Score: 0.4527572989463806

Sentence 1: Set of representational primitives with which to model a domain of knowledge or discourse.
Sentence 2: A formal naming and definition of the types, properties, and interrelationships of the entities.
Similarity Score: 0.4082657992839813

Sentence 1: Set of representational primitives with which to model a domain of knowledge or discourse.
Sentence 2: A systematic account of existence.
Similarity Score: 0.2979518175125122



In [None]:
# Retrieve Top K most similar sentences from a corpus given a sentence

In [91]:
corpus = ["A formal naming and definition of the types, properties, and interrelationships of the entities.",
          "A systematic account of existence.",
          "A specification of a conceptualization.",
          "It studies concepts such as existence, being, becoming and reality.",
          "It represents a domain of discourse as a common ground for encoding content meaning and user interests.",
          "The branch of philosophy that deals with the nature of existence.",
          "Set of concepts (terms) and the relationships among them as representing the consensual knowledge of a specific domain.",
          "A “formal specification of a shared conceptualization”." ,
          "Set of representational primitives with which to model a domain of knowledge or discourse."   
          ]

# encode corpus to get corpus embeddings
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)
sentence = "An account of existence."

# encode sentence to get sentence embeddings
sentence_embedding = model.encode(sentence, convert_to_tensor=True)

# top_k results to return
top_k=2

# compute similarity scores of the sentence with the corpus
cos_scores = util.pytorch_cos_sim(sentence_embedding, corpus_embeddings)[0]

# Sort the results in decreasing order and get the first top_k
top_results = np.argpartition(-cos_scores, range(top_k))[0:top_k]
print("Sentence:", sentence, "\n")
print("Top", top_k, "most similar sentences in corpus:")
for idx in top_results[0:top_k]:
    print(corpus[idx], "(Score: %.4f)" % (cos_scores[idx]))

Sentence: An account of existence. 

Top 2 most similar sentences in corpus:
A systematic account of existence. (Score: 0.7924)
The branch of philosophy that deals with the nature of existence. (Score: 0.5599)
