In [None]:
%matplotlib inline
from graphdatascience import GraphDataScience
from neomodel import config
import os
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns
import pandas as pd
from src.models.simple_graph import *
import numpy as np
from mpl_toolkits.mplot3d import Axes3D


# database = input("Database name: ")
if os.path.exists("data/config.json"):
    with open("data/config.json") as config_file:
        config_data = json.loads(config_file.read())
        if "csv_path" in config_data:
            csv_path = config_data["csv_path"]
        if "username" in config_data:
            username = config_data["username"]
        if "password" in config_data:
            password = config_data["password"]
        if "host" in config_data:
            host = config_data["host"]
        if "database" in config_data:
            database = config_data["database"]
        if "port" in config_data:
            port = config_data["port"]

config.DATABASE_URL = f"bolt://{username}:{password}@{host}:{port}/{database}"

In [None]:
X = []
y = []

with open(csv_path) as mimic_data:
    num_lines = 0
    for line in mimic_data:
        num_lines += 1

with open(csv_path) as mimic_data:
    i = 0
    for line in tqdm(mimic_data, total=num_lines, desc="Load data from input file..."):
        i += 1
        entry = line.strip().replace('"', "").split(",")

        if i == 1:
            continue

        # Get visit ID and LOS label
        visit_id = entry[0]
        los_over = entry[5]

        # Get visit node from Neo4j so we can get the embedding
        visit = Visit.nodes.get(visit_id = visit_id)

        # Add embedding and label to lists
        X.append(visit.embedding)
        y.append(los_over)

In [None]:
tsne = None
tsne_results = None

tsne = TSNE(n_components=2, verbose=1, perplexity=100, n_iter=1000, learning_rate="auto", init="pca")
tsne_results = tsne.fit_transform(np.array(X))

df_subset = pd.DataFrame()
df_subset['tsne-2d-one'] = tsne_results[:,0]
df_subset['tsne-2d-two'] = tsne_results[:,1]
df_subset['los6'] = y

In [None]:
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="tsne-2d-one", y="tsne-2d-two",
    hue="los6",
    palette=sns.color_palette("hls", 2),
    data=df_subset,
    legend="full",
    alpha=0.3
)