In [None]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

from sklearn import preprocessing

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt



In [None]:
#load data
data_black_real = pd.read_csv("data/data_real_black.csv", sep=',')
data_black_fake_gan = pd.read_csv("data/GAN_400_data_fake_black.csv", sep=',')
data_black_fake_cgan = pd.read_csv("data/CGAN_generations/3_650/data_fake.csv", sep=',')
data_black_fake_smote = pd.read_csv("data/SMOTE/data_smote_fake_cat.csv", sep=',')

#data_black_fake_smote = data_fake_smote[data_fake_smote['icustay_id'].isna()]



# drop indices and reorder columns
data_black_real = data_black_real.drop(columns=['index', 'icustay_id', 'hour'])
data_black_fake_smote = data_black_fake_smote.drop(columns=['id', 'icustay_id', 'hour','ethnicity_x','ethnicity_y'])

data_black_fake_gan = data_black_fake_gan[data_black_real.columns]
data_black_fake_cgan = data_black_fake_cgan[data_black_real.columns]
data_black_fake_smote = data_black_fake_smote[data_black_real.columns]

In [None]:
# data_reduced
data_reduced = np.concatenate((data_black_real, data_black_fake_cgan), axis=0)


In [None]:
n_components = 2
tsne = TSNE(n_components=n_components, n_iter=300)

In [None]:

# tsne_results
tsne_results_cgan = pd.DataFrame(tsne.fit_transform(data_reduced))

In [None]:
n_components = 2
pca = PCA(n_components=n_components)



In [None]:
#normalize for pca



min_max_scaler = preprocessing.MinMaxScaler()

#x = df.values #returns a numpy array
data_black_real_norm = min_max_scaler.fit_transform(data_black_real)
data_black_fake_cgan_norm = min_max_scaler.fit_transform(data_black_fake_cgan)
#data_black_real_norm = pd.DataFrame(x_scaled)

In [None]:
# The fit of the methods must be done only using the real sequential data
pca.fit(data_black_real_norm)

# pca_real:
# pca_synth: 
pca_real = pd.DataFrame(pca.transform(data_black_real_norm))
pca_synth_cgan = pd.DataFrame(pca.transform(data_black_fake_cgan_norm))

In [None]:
#GAN

In [None]:
# data_reduced
data_reduced = np.concatenate((data_black_real, data_black_fake_gan), axis=0)

In [None]:
# tsne_results
tsne_results_gan = pd.DataFrame(tsne.fit_transform(data_reduced))

In [None]:
#normalize for pca
from sklearn import preprocessing


min_max_scaler = preprocessing.MinMaxScaler()

#x = df.values #returns a numpy array
data_black_real_norm = min_max_scaler.fit_transform(data_black_real)
data_black_fake_gan_norm = min_max_scaler.fit_transform(data_black_fake_gan)
#data_black_real_norm = pd.DataFrame(x_scaled)

In [None]:
# The fit of the methods must be done only using the real sequential data
pca.fit(data_black_real_norm)

# pca_real:
# pca_synth: 
pca_real = pd.DataFrame(pca.transform(data_black_real_norm))
pca_synth_gan = pd.DataFrame(pca.transform(data_black_fake_gan_norm))

In [None]:
#SMOTE

In [None]:
# data_reduced
data_reduced = np.concatenate((data_black_real, data_black_fake_smote), axis=0)

In [None]:
# tsne_results
tsne_results_smote = pd.DataFrame(tsne.fit_transform(data_reduced))

In [None]:
#normalize for pca
from sklearn import preprocessing


min_max_scaler = preprocessing.MinMaxScaler()

#x = df.values #returns a numpy array
data_black_real_norm = min_max_scaler.fit_transform(data_black_real)
data_black_fake_smote_norm = min_max_scaler.fit_transform(data_black_fake_smote)
#data_black_real_norm = pd.DataFrame(x_scaled)

In [None]:
# The fit of the methods must be done only using the real sequential data
pca.fit(data_black_real_norm)

# pca_real:
# pca_synth: 
pca_real = pd.DataFrame(pca.transform(data_black_real_norm))
pca_synth_smote = pd.DataFrame(pca.transform(data_black_fake_smote_norm))

In [None]:
#https://github.com/archity/synthetic-data-gan/blob/main/timeseries-data/energy-data-synthesize.ipynb

# Scatter plots for PCA and t-SNE methods
fig = plt.figure(constrained_layout=True, figsize=(20, 10))
spec = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)

ax = fig.add_subplot(spec[0,0])
ax.set_title('PCA',
             fontsize=20,
             #color='red',
             pad=10)

# PCA scatter plot
plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.08, label='Real Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)

plt.scatter(pca_synth_smote.iloc[:, 0], pca_synth_smote.iloc[:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)




ax.legend()
leg = plt.legend()    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('T-SNE',
              fontsize=20,
              #color='red',
              pad=10)


# t-SNE scatter plot
plt.scatter(tsne_results_smote.iloc[data_black_real.shape[0]:, 0], tsne_results_smote.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.scatter(tsne_results_smote.iloc[:data_black_real.shape[0], 0].values, tsne_results_smote.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=0.1, label='Real Data')


ax2.legend()
leg = plt.legend()    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#fig.suptitle('Validating synthetic vs real data diversity and distributions',
#             fontsize=16,
#             color='grey')


plt.savefig('plot/tsne-smote.png', dpi=200)

In [None]:
#https://github.com/archity/synthetic-data-gan/blob/main/timeseries-data/energy-data-synthesize.ipynb

# Scatter plots for PCA and t-SNE methods
fig = plt.figure(constrained_layout=True, figsize=(20, 10))
spec = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)

ax = fig.add_subplot(spec[0,0])
ax.set_title('PCA',
             fontsize=20,
             #color='red',
             pad=10)

# PCA scatter plot
plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.08, label='Real Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)

plt.scatter(pca_synth_gan.iloc[:, 0], pca_synth_gan.iloc[:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)



ax.legend()
leg = plt.legend()    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('T-SNE',
              fontsize=20,
              #color='red',
              pad=10)


# t-SNE scatter plot
plt.scatter(tsne_results_gan.iloc[data_black_real.shape[0]:, 0], tsne_results_gan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.scatter(tsne_results_gan.iloc[:data_black_real.shape[0], 0].values, tsne_results_gan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=0.1, label='Real Data')


ax2.legend()
leg = plt.legend()    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#fig.suptitle('Validating synthetic vs real data diversity and distributions',
#             fontsize=16,
#             color='grey')


plt.savefig('plot/tsne-gan.png', dpi=200)

In [None]:
#https://github.com/archity/synthetic-data-gan/blob/main/timeseries-data/energy-data-synthesize.ipynb

# Scatter plots for PCA and t-SNE methods




fig = plt.figure(constrained_layout=True, figsize=(20, 10))
spec = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)

ax = fig.add_subplot(spec[0,0])
ax.set_title('PCA',
             fontsize=20,
             #color='red',
             pad=10)

# PCA scatter plot
plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.08, label='Real Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)

plt.scatter(pca_synth_cgan.iloc[:, 0], pca_synth_cgan.iloc[:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)

handles, labels = ax.get_legend_handles_labels() #invert legend order 

leg = plt.legend(handles[::-1], labels[::-1])    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('T-SNE',
              fontsize=20,
              #color='red',
              pad=10)


# t-SNE scatter plot
plt.scatter(tsne_results_cgan.iloc[data_black_real.shape[0]:, 0], tsne_results_cgan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=0.03, label='Synthetic Data')
plt.scatter(tsne_results_cgan.iloc[:data_black_real.shape[0], 0].values, tsne_results_cgan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=0.1, label='Real Data')


ax2.legend()
leg = plt.legend()    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#fig.suptitle('Validating synthetic vs real data diversity and distributions',
#             fontsize=16,
#             color='grey')


plt.savefig('plot/tsne-cgan.png', dpi=200)

In [None]:
# t-SNE together


alpha_red = 0.08
alpha_blue = 0.03


fig = plt.figure(constrained_layout=True, figsize=(30, 8))
spec = gridspec.GridSpec(ncols=3, nrows=1, figure=fig)

#smote
ax1 = fig.add_subplot(spec[0,0])
ax1.set_title('SMOTE',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(tsne_results_smote.iloc[data_black_real.shape[0]:, 0], tsne_results_smote.iloc[data_black_real.shape[0]:, 1],
             c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_smote.iloc[:data_black_real.shape[0], 0].values, tsne_results_smote.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('WGAN-GP',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(tsne_results_gan.iloc[data_black_real.shape[0]:, 0], tsne_results_gan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_gan.iloc[:data_black_real.shape[0], 0].values, tsne_results_gan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


handles, labels = ax2.get_legend_handles_labels()
ax2.legend()
leg = plt.legend(handles[::-1], labels[::-1], loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)    
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#cgan
ax3 = fig.add_subplot(spec[0,2])
ax3.set_title('CA-GAN', fontweight="bold",
              fontsize=34,
              #color='red',
              pad=10)


# t-SNE scatter plot
plt.scatter(tsne_results_cgan.iloc[data_black_real.shape[0]:, 0], tsne_results_cgan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_cgan.iloc[:data_black_real.shape[0], 0].values, tsne_results_cgan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax3.legend()
leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """


#fig.legend(labels=['Synthetic Data','Real Data'], loc="lower center",    borderaxespad=1   )


plt.savefig('plot/2d-viz/tsne-all-small.png', dpi=200)

In [None]:
# PCA together




fig = plt.figure(constrained_layout=True, figsize=(30, 8))
spec = gridspec.GridSpec(ncols=3, nrows=1, figure=fig)

#smote
ax1 = fig.add_subplot(spec[0,0])
ax1.set_title('SMOTE',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_smote.iloc[:, 0], pca_synth_smote.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)

""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('WGAN-GP',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_gan.iloc[:, 0], pca_synth_gan.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


ax2.legend()
leg = plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)      
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#cgan
ax3 = fig.add_subplot(spec[0,2])
ax3.set_title('CA-GAN', fontweight="bold",
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_cgan.iloc[:, 0], pca_synth_cgan.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax3.legend()
leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """


#fig.legend(labels=['Synthetic Data','Real Data'], loc="lower center",    borderaxespad=1   )


plt.savefig('plot/2d-viz/pca-all-small.png', dpi=200)

In [None]:
# pca + tsne together

fig = plt.figure(layout="constrained", figsize=(30, 15))

subfigs = fig.subfigures(2, 1, wspace=0.07, height_ratios=[1., 1.])

################# tsne #######################################################################
# t-SNE together


alpha_red = 0.08
alpha_blue = 0.03


#fig = plt.figure(constrained_layout=True, figsize=(30, 8))
spec = gridspec.GridSpec(ncols=4, nrows=1, figure=subfigs[0], width_ratios = [.1,1,1,1])

#method
ax0 = subfigs[0].add_subplot(spec[0,0])
ax0.set_axis_off()
ax0.text(.5,.5,'t-SNE',rotation=90, ha='center',va='center', fontsize=30)

#smote
ax1 = subfigs[0].add_subplot(spec[0,1])
ax1.set_title('SMOTE',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(tsne_results_smote.iloc[data_black_real.shape[0]:, 0], tsne_results_smote.iloc[data_black_real.shape[0]:, 1],
             c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_smote.iloc[:data_black_real.shape[0], 0].values, tsne_results_smote.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = subfigs[0].add_subplot(spec[0,2])
ax2.set_title('WGAN-GP*',
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(tsne_results_gan.iloc[data_black_real.shape[0]:, 0], tsne_results_gan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_gan.iloc[:data_black_real.shape[0], 0].values, tsne_results_gan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" handles, labels = ax2.get_legend_handles_labels()
ax2.legend()
leg = plt.legend(handles[::-1], labels[::-1], loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#cgan
ax3 = subfigs[0].add_subplot(spec[0,3])
ax3.set_title('CA-GAN', fontweight="bold",
              fontsize=34,
              #color='red',
              pad=10)


# t-SNE scatter plot
plt.scatter(tsne_results_cgan.iloc[data_black_real.shape[0]:, 0], tsne_results_cgan.iloc[data_black_real.shape[0]:, 1],
            c='deepskyblue', alpha=alpha_blue, label='Synthetic Data')
plt.scatter(tsne_results_cgan.iloc[:data_black_real.shape[0], 0].values, tsne_results_cgan.iloc[:data_black_real.shape[0], 1].values,
            c='r', alpha=alpha_red, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


################## pca #######################################################################

#fig = plt.figure(constrained_layout=True, figsize=(30, 8))
spec = gridspec.GridSpec(ncols=4, nrows=1, figure=subfigs[1], width_ratios = [.1,1,1,1])

#method
ax0 = subfigs[1].add_subplot(spec[0,0])
ax0.set_axis_off()
ax0.text(.5,.5,'PCA',rotation=90, ha='center',va='center', fontsize=30)

#smote
ax1 = subfigs[1].add_subplot(spec[0,1])
#ax1.set_title('SMOTE', fontsize=34,pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_smote.iloc[:, 0], pca_synth_smote.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)

""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = subfigs[1].add_subplot(spec[0,2])
#ax2.set_title('WGAN-GP', fontsize=34, pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_gan.iloc[:, 0], pca_synth_gan.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


ax2.legend()
leg = plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)      
for lh in leg.legendHandles: 
    lh.set_alpha(1)

#cgan
ax3 = subfigs[1].add_subplot(spec[0,3])
#ax3.set_title('CA-GAN', fontweight="bold",fontsize=34,pad=10)


plt.scatter(pca_real.iloc[:, 0].values, pca_real.iloc[:, 1].values,
            c='r', alpha=0.3, label='Real Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.scatter(pca_synth_cgan.iloc[:, 0], pca_synth_cgan.iloc[:, 1],
            c='deepskyblue', alpha=0.05, label='Synthetic Data')
plt.xlim(-1, 1.5)
plt.ylim(-1, 1.5)

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax3.legend()
leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """


#fig.legend(labels=['Synthetic Data','Real Data'], loc="lower center",    borderaxespad=1   )


plt.savefig('plot/2d-viz/all-asterisk.png', dpi=200)

In [None]:
tsne.get_params()

In [None]:
import umap

reducer = umap.UMAP()

In [None]:
embedding = reducer.fit_transform(data_black_real)
embedding.shape

In [None]:
plt.scatter(embedding[:,0], embedding[:,1])

In [None]:
embedding_cgan = reducer.transform(data_black_fake_cgan)

In [None]:
embedding_gan = reducer.transform(data_black_fake_gan)

In [None]:
embedding_smote = reducer.transform(data_black_fake_smote)

In [None]:
plt.scatter(embedding_cgan[:,0], embedding_cgan[:,1])
plt.scatter(embedding[:,0], embedding[:,1])

In [None]:
# UMAP together




fig = plt.figure(constrained_layout=True, figsize=(30, 10))
spec = gridspec.GridSpec(ncols=3, nrows=1, figure=fig)

#smote
ax1 = fig.add_subplot(spec[0,0])
ax1.set_title('SMOTE',
              fontsize=34,
              #color='red',
              pad=10)



plt.scatter(embedding_smote[:, 0], embedding_smote[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')




""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = fig.add_subplot(spec[0,1])
ax2.set_title('WGAN-GP',
              fontsize=34,
              #color='red',
              pad=10)



plt.scatter(embedding_gan[:, 0], embedding_gan[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')


handles, labels = ax2.get_legend_handles_labels()
ax2.legend()
leg = plt.legend(handles[::-1], labels[::-1], loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)    
for lh in leg.legendHandles: 
    lh.set_alpha(1)


#cgan
ax3 = fig.add_subplot(spec[0,2])
ax3.set_title('CA-GAN', fontweight="bold",
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(embedding_cgan[:, 0], embedding_cgan[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')






""" ax3.legend()
leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """


#fig.legend(labels=['Synthetic Data','Real Data'], loc="lower center",    borderaxespad=1   )


plt.savefig('plot/umap-all.png', dpi=200)

In [None]:
# UMAP new version




fig = plt.figure(constrained_layout=True, figsize=(30, 10))
spec = gridspec.GridSpec(ncols=4, nrows=1, figure=fig, width_ratios = [.1,1,1,1])

#method
ax0 = fig.add_subplot(spec[0,0])
ax0.set_axis_off()
ax0.text(.5,.5,'UMAP',rotation=90, ha='center',va='center', fontsize=30)

#smote
ax1 = fig.add_subplot(spec[0,1])
ax1.set_title('SMOTE',
              fontsize=34,
              #color='red',
              pad=10)



plt.scatter(embedding_smote[:, 0], embedding_smote[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


""" ax1.legend()
leg = plt.legend(fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """

#gan
ax2 = fig.add_subplot(spec[0,2])
ax2.set_title('WGAN-GP*',
              fontsize=34,
              #color='red',
              pad=10)



plt.scatter(embedding_gan[:, 0], embedding_gan[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)


handles, labels = ax2.get_legend_handles_labels()
ax2.legend()
leg = plt.legend(handles[::-1], labels[::-1], loc='upper center', bbox_to_anchor=(0.5, -0.05),fontsize=24, ncol=2,markerscale=5)    
for lh in leg.legendHandles: 
    lh.set_alpha(1)


#cgan
ax3 = fig.add_subplot(spec[0,3])
ax3.set_title('CA-GAN', fontweight="bold",
              fontsize=34,
              #color='red',
              pad=10)


plt.scatter(embedding_cgan[:, 0], embedding_cgan[:, 1],
            s=10,
            c='deepskyblue', alpha=0.5, label='Synthetic Data')
plt.scatter(embedding[:, 0], embedding[:, 1],
            s=10,
            c='r', alpha=0.1, label='Real Data')

plt.tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)




""" ax3.legend()
leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=18)    
for lh in leg.legendHandles: 
    lh.set_alpha(1) """


#fig.legend(labels=['Synthetic Data','Real Data'], loc="lower center",    borderaxespad=1   )


plt.savefig('plot/2d-viz/umap-all-asterisk.png', dpi=200)

In [None]:





umap_3d = umap.UMAP(n_components=3, init='random', random_state=0)

embedding_3d = umap_3d.fit_transform(data_black_real)
embedding_3d_cgan = umap_3d.transform(data_black_fake_cgan)
embedding_3d_gan = umap_3d.transform(data_black_fake_gan)
embedding_3d_smote = umap_3d.transform(data_black_fake_smote)



In [None]:
import plotly.express as px
px.scatter_3d(
    embedding_3d, x=0, y=1, z=2
).update_traces(marker_size=1)
px.scatter_3d(
    embedding_3d_cgan, x=0, y=1, z=2, color='r'
).update_traces(marker_size=1)
