#Direct GDA scoring

This code is used for direct score calculation. Cosine similarity between MGI and OMIM is directly calculated.

Dependencies:
numpy==1.26.4
scipy==1.13.1

In [None]:
#imports needed

import numpy as np
from scipy.spatial.distance import cosine
from google.colab import drive
from tqdm import tqdm

In [None]:
#embeddings file path
embedding_file_path = '/content/drive/My Drive/colab_output/embedding_file_example.txt'

def load_embeddings(filepath):
    embeddings = {}
    with open(filepath, 'r') as file:
        for line in file:

            if line.strip() == "Entity Embeddings:" or not line.startswith("http://mowl.borg/"):
                continue

            parts = line.strip().split()

            if len(parts) < 2:
                continue


            entity_id = parts[0].split('/')[-1]


            try:
                vector = np.array([float(x) for x in parts[1:]])
                embeddings[entity_id] = vector
            except ValueError:
                print(f"Skipping line due to conversion error: {line}")

    return embeddings


#loading embeddings
embeddings = load_embeddings(embedding_file_path)

#getting MGI and OMIM embeddings
mgi_embeddings = {k: v for k, v in embeddings.items() if k.startswith("MGI_")}
omim_embeddings = {k: v for k, v in embeddings.items() if k.startswith("OMIM_")}

#output file for scores
output_cosine_similarity_filepath = '/content/drive/My Drive/colab_output/scores_file_name.txt'

#cosine similarity between MGI and OMIM embeddings directly
print("Calculating cosine similarity between MGI and OMIM embeddings")

cosine_similarity_results = []

#iterating through MGI embeddings and calculate the cosine similarity with OMIM embeddings
for mgi_id, mgi_vector in tqdm(mgi_embeddings.items(), desc="Calculating MGI to OMIM cosine similarity"):
    for omim_id, omim_vector in omim_embeddings.items():

        cosine_similarity = 1 - cosine(mgi_vector, omim_vector)
        cosine_similarity_results.append((mgi_id, omim_id, cosine_similarity))

#sorting in descending order
cosine_similarity_results.sort(key=lambda x: x[2], reverse=True)

#saving results
with open(output_cosine_similarity_filepath, 'w') as file:
    file.write("MGI Entity\tOMIM Entity\tCosine Similarity\n")
    for mgi_id, omim_id, cosine_similarity in cosine_similarity_results:
        file.write(f"{mgi_id}\t{omim_id}\t{cosine_similarity:.4f}\n")

print(f"Cosine similarities for MGI-OMIM entity pairs saved to {output_cosine_similarity_filepath}.")
