In [1]:
import csv
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

from shared.plot import plot_geodesic, plot_hierarchy, plot_train_embed, get_dict_data
from shared.io import read_data, read_ref
from train import init_torch_objects, train

OUT_DIMENSIONS = 10 # 50
NEG_SAMPLES = 10 # 10
EPOCH = 501
DEVICE = "cuda:0" # or "cpu"
torch.set_default_dtype(torch.float64)

In [2]:
# Plot geodesic comparison between Poincaré and Euclidean
# plot_geodesic()
import torch
print(torch.cuda.is_available())

True


In [3]:
# Load edge data
data, weights, objects, neighbors, diff_summed, num_relations = read_data(Path("data","opehr_concepts_11454.csv"))

# load concept reference 
ref = read_ref(Path('data','ref.csv'))

# define fixed index clinical finding
clinical_finding_concept_id = 441840
fixed_index = objects.index(clinical_finding_concept_id)

# initialize torch objects for the training loop
model, optimizer, loss_func = init_torch_objects(objects, OUT_DIMENSIONS, fixed_index)

if "cuda:0" == DEVICE:
    model = model.to(DEVICE)

# ToDo: implement function to load embedding and continue training

# ensure that ref contains all concepts
dict_data = dict(enumerate(objects))
for key, value in dict_data.items():
    try:
        dict_data[key] = ref.loc[ref['concept_id'] == value].concept_name.values[0]
    except Exception as e:
        print(f"Error at Key={key}, Value={value}, Error={e}")


Processing dataset...
Edges: 54803
Relations: 54803
Nodes: 26909


In [None]:
# plot_hierarchy(data, objects, ref, True)

In [None]:
train(data=data, weights=weights, objects=objects, neighbors=neighbors,
      diff_summed=diff_summed, num_relations=num_relations,
      model=model, optimizer=optimizer, loss_func=loss_func,
      out_dimensions=OUT_DIMENSIONS, n_neg_samples=NEG_SAMPLES, n_epochs=EPOCH,
      n_burn_in=10, device=DEVICE)

Epoch: 0
Evaluating mean rank:


100%|██████████| 26909/26909 [00:28<00:00, 939.08it/s]



Mean rank: 8302.7630604164, loss: 2.396066915368226
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Evaluating mean rank:


100%|██████████| 26909/26909 [00:26<00:00, 1028.40it/s]



Mean rank: 2888.779190920205, loss: 23.73981120144328
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1046.37it/s]



Mean rank: 1314.0828604273488, loss: 16.07748848698065
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1040.25it/s]



Mean rank: 1539.5841103589219, loss: 8.453479787192554
Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1058.69it/s]



Mean rank: 1588.567943360765, loss: 6.394468025050422
Epoch: 41
Epoch: 42
Epoch: 43
Epoch: 44
Epoch: 45
Epoch: 46
Epoch: 47
Epoch: 48
Epoch: 49
Epoch: 50
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1046.35it/s]



Mean rank: 1610.3134682407897, loss: 5.62433228444947
Epoch: 51
Epoch: 52
Epoch: 53
Epoch: 54
Epoch: 55
Epoch: 56
Epoch: 57
Epoch: 58
Epoch: 59
Epoch: 60
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1048.35it/s]



Mean rank: 1631.5567760889003, loss: 5.233926577606724
Epoch: 61
Epoch: 62
Epoch: 63
Epoch: 64
Epoch: 65
Epoch: 66
Epoch: 67
Epoch: 68
Epoch: 69
Epoch: 70
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1044.27it/s]



Mean rank: 1653.320000729887, loss: 4.996750393648495
Epoch: 71
Epoch: 72
Epoch: 73
Epoch: 74
Epoch: 75
Epoch: 76
Epoch: 77
Epoch: 78
Epoch: 79
Epoch: 80
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1052.53it/s]



Mean rank: 1676.8895498421618, loss: 4.835539766332836
Epoch: 81
Epoch: 82
Epoch: 83
Epoch: 84
Epoch: 85
Epoch: 86
Epoch: 87
Epoch: 88
Epoch: 89
Epoch: 90
Evaluating mean rank:


100%|██████████| 26909/26909 [00:25<00:00, 1047.01it/s]



Mean rank: 1699.5647865992737, loss: 4.724732951133751
Epoch: 91
Epoch: 92
Epoch: 93
Epoch: 94
Epoch: 95


In [4]:
dict_data = get_dict_data(objects, ref, dict_type="name")
model = torch.load("output/poincare_model_dim_3.pt")
coordinates = model["state_dict"]["embedding.weight"].numpy()
# print(model.state_dict()['embedding.weight'])
# coordinates = model.embedding.weight
print(coordinates)
#######################################################
# some experiment with 3d plotting in TF projector  
x_np = coordinates # .detach().numpy()
x_df = pd.DataFrame(x_np)
x_df.to_csv(Path('output','tf_proj_vec.tsv'), sep="\t", index=False, header=False)

df = pd.Series(dict_data)
df.to_string()
print(df)
df.to_csv(Path('output','tf_proj_lab.tsv'), sep="\t", index=False, header=False,
          quoting=csv.QUOTE_NONNUMERIC)
# df["index"].map(dictData)
###########################

# print(len(objects))
#print(data)

plt.figure()
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.axis('off')

data, weights, objects, neighbors, diff_summed, num_relations = read_data("data/opehr_concepts.csv")

# add some jitter to better see labels
jitter = 0.02
jitter_x = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))
jitter_y = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))

for x in range(coordinates.shape[0]):
    plt.annotate(dict_data[x], (coordinates[x,0].detach().numpy()+jitter_x[x],
                               coordinates[x,1].detach().numpy()+jitter_y[x]), fontsize=4)
    # plt.annotate(dictData[x], (coordinates[x,0]*100, coordinates[x,1]*100),
    #              bbox={"fc":"white", "alpha":0.9}, fontsize=4)

# Plot edges of original hierarchy
for i in range(data.shape[0]):
    x_values = [coordinates[data[i][0], 0].detach().numpy(), coordinates[data[i][1], 0].detach().numpy()]
    y_values = [coordinates[data[i][0], 1].detach().numpy(), coordinates[data[i][1], 1].detach().numpy()]
#
#     x_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
#     y_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
    plt.plot(x_values, y_values, color="black", linewidth=0.2)

plt.savefig(Path("output", "hierarchy_embed.png"), dpi=300, facecolor="white")
plt.show()

dictData[1]: 37017430


FileNotFoundError: [Errno 2] No such file or directory: 'output/poincare_model_dim_3.pt'

In [None]:
from shared.io import write_tensorflow_projector_data

model_path = 'output/poincare_model_dim_10_epoch_100.pt'
ref_csv_path = 'data/ref.csv'

write_tensorflow_projector_data(model_path, ref_csv_path)

In [None]:
import torch

# Load the tensor from the .pt file
file_path = "D:/git/omop-poincare/output/embedding.pt"
tensor = torch.load(file_path, map_location=torch.device("cpu"))

# Print the contents of the tensor
print(tensor)


In [7]:
import torch

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

# Print the result
if cuda_available:
    print("CUDA is available.")
else:
    print("CUDA is not available.")



CUDA is not available.


In [7]:
from shared.io import convert_embedding_for_plp

convert_embedding_for_plp("output/poincare_model_dim_10_epoch_500.pt", "output/embedding_500.pt")


FileNotFoundError: [Errno 2] No such file or directory: 'output/poincare_model_dim_10_epoch_500.pt'