In [24]:
%matplotlib notebook
import matplotlib 
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import random
import plotly.express as px

### Reading data

In [25]:
Tongji_375_data = pd.read_csv("data/Tongji_375_CN.csv")
Tongji_110_data = pd.read_csv("data/Tongji_110_CN.csv")
St_Antonius_data = pd.read_csv("data/St_Antonius_NL.csv")
Outcomerea_FR_data = pd.read_csv("data/Outcomerea_FR.csv")
Northwell_US_data = pd.read_csv("data/Northwell_US.csv")
clustered_Tongji_375 = pd.read_csv("data/kmeans_tongji375.csv")
clustered_log1p_Tongji_375 = pd.read_csv("data/kmeans_log1p_tongji_375.csv")

In [26]:
Northwell_US_data = Northwell_US_data.loc[random.sample(range(0,1038), 300),:]

In [27]:
Tongji_375_data['type'] = ["Tongji_375"] * Tongji_375_data.shape[0]
Tongji_110_data['type'] = ["Tongji_110"] * Tongji_110_data.shape[0]
St_Antonius_data['type'] = ["St_Antonius"] * St_Antonius_data.shape[0]
Outcomerea_FR_data['type'] = ["Outcomerea_FR"] * Outcomerea_FR_data.shape[0]
Northwell_US_data['type'] = ["Northwell_US"] * Northwell_US_data.shape[0]

In [28]:
combined_data_with_NU = pd.concat([Tongji_375_data, Tongji_110_data,
                           St_Antonius_data, Outcomerea_FR_data,
                           Northwell_US_data]).reset_index()

### Log transforming

In [29]:
combined_data_with_NU['LDH_last'] = np.log1p(combined_data_with_NU['LDH_last'])
combined_data_with_NU['hsCRP_last'] = np.log1p(combined_data_with_NU['hsCRP_last'])
combined_data_with_NU['lymphocytes_last'] = np.log1p(combined_data_with_NU['lymphocytes_last'])
combined_data_with_NU['died'] = np.where(combined_data_with_NU['outcome'] == 1, 'died', 'survived')

In [30]:
combined_data = combined_data_with_NU[combined_data_with_NU['type'] != "Northwell_US" ].reset_index()

In [31]:
clustered_log1p_Tongji_375['LDH_last'] = clustered_log1p_Tongji_375['LDH']
clustered_log1p_Tongji_375['hsCRP_last'] = clustered_log1p_Tongji_375['hsCRP']
clustered_log1p_Tongji_375['lymphocytes_last'] = clustered_log1p_Tongji_375['lymph']
clustered_log1p_Tongji_375["Cluster"] = clustered_log1p_Tongji_375.cluster.apply(lambda x: "Cluster 1" if x == 1 else "Cluster 2")
clustered_log1p_Tongji_375["Outcome"] = clustered_log1p_Tongji_375.outcome.apply(lambda x: "Died" if x == 1 else "Survived")

### Drawing plots

In [32]:
# scatter raw
fig = px.scatter_3d(combined_data,
    x = 'LDH_last', 
                   y = 'hsCRP_last', z = 'lymphocytes_last', 
                    color = 'type', 
                    symbol = 'outcome',
                 title = "Scatterplot of features for each hospital")
fig.write_html("3D_plots/3d_plot_hospitals.html")

In [33]:
# scatter with northwell
fig_NU = px.scatter_3d(combined_data_with_NU,
    x = 'LDH_last', y = 'hsCRP_last', z = 'lymphocytes_last', color = 'type', symbol = 'died',
                 title = "Scatterplot of features for each hospital (with Northwell US)")
fig_NU.write_html("3D_plots/3d_plot_hospitals_with_NU.html")

In [34]:
# scatter with tongji cluster
fig_cluster =  px.scatter_3d(clustered_log1p_Tongji_375, x = 'LDH_last', 
                    y = 'hsCRP_last', z = 'lymphocytes_last', symbol = 'Outcome', color = 'Cluster',
                     title = "Clusterization results for Tongji hospital")
fig_cluster.write_html("3D_plots/3d_plot_clusters.html")