In [None]:
""" functions to regress y (labels) based on z (latent space) """
import os
import glob
import numpy as np
import pickle
import h5py
import pandas as pd
import sys
import tensorflow as tf
import PIL.Image
import datetime
import glob

import src.misc as misc
import src.tl_gan.feature_axis as feature_axis
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
""" get y and z from pre-generated files """
path_gan_sample_img = './asset_results/pggan_x_ray_integrated_norm_sample_jpg/'
#path_celeba_att = './data/raw/celebA_annotation/list_attr_celeba.txt'
path_feature_direction = './asset_results/pg_gan_x_ray_integrated_norm_feature_direction_5/'

filename_sample_y = 'sample_y.h5'
filename_sample_z = 'sample_z.h5'

pathfile_y = os.path.join(path_gan_sample_img, filename_sample_y)
pathfile_z = os.path.join(path_gan_sample_img, filename_sample_z)

with h5py.File(pathfile_y, 'r') as f:
    y = f['y'][:]
with h5py.File(pathfile_z, 'r') as f:
    z = f['z'][:]

In [None]:
# for normal images
filename_normal_z = 'normal_z.h5'
pathfile_normal_z = os.path.join(path_gan_sample_img, filename_normal_z)
with h5py.File(pathfile_normal_z, 'r') as f:
    z_normal = f['z'][:]

In [None]:
def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])
#y_onehot = np.eye(len(y), 5, dtype=np.int8)[y.reshape(-1)]
#y_name = ['00Normal', '01Nodule', '03Consolidation', '04InterstitialOpacity','10PleuralEffusion']
y_onehot = np.eye(len(y), 4, dtype=np.int8)[(y - 1).reshape(-1)]
y_name = ['01Nodule', '03Consolidation', '04InterstitialOpacity','10PleuralEffusion']

# Regression: latent space z to predict features y

In [None]:
##
""" regression: use latent space z to predict features y """
method ='linear'
feature_slope = feature_axis.find_feature_axis(z, y_onehot, method=method)

""" normalize the feature vectors """
yn_normalize_feature_direction = True
if yn_normalize_feature_direction:
    feature_direction = feature_axis.normalize_feature_axis(feature_slope)
else:
    feature_direction = feature_slope
    
""" save_regression result to hard disk """
if not os.path.exists(path_feature_direction):
    os.mkdir(path_feature_direction)
    
pathfile_feature_direction = os.path.join(path_feature_direction, 'feature_direction_{}_{}.pkl'.format(method, misc.gen_time_str()))
dict_to_save = {'direction': feature_direction, 'name': y_name}
with open(pathfile_feature_direction, 'wb') as f:
    pickle.dump(dict_to_save, f)


In [None]:
""" disentangle correlated feature axis """
pathfile_feature_direction = glob.glob(os.path.join(path_feature_direction, 'feature_direction_*.pkl'))[-1]

with open(pathfile_feature_direction, 'rb') as f:
    feature_direction_name = pickle.load(f)

feature_direction = feature_direction_name['direction']
feature_name = np.array(feature_direction_name['name'])

len_z, len_y = feature_direction.shape

feature_direction_disentangled = feature_axis.disentangle_feature_axis_by_idx(
    feature_direction, idx_base=range(len_y), idx_target=None)

feature_axis.plot_feature_cos_sim(feature_direction_disentangled, feature_name=feature_name)

# Dimension reduction & plotting

In [None]:
import matplotlib.pyplot as plt
import time
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE


X = z
Y = y-1
target_names = y_name

pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)

lda = LinearDiscriminantAnalysis(n_components=2)
X_r2 = lda.fit(X, Y).transform(X)

tsne = TSNE(n_components=2)
X_r3 = tsne.fit_transform(X)

In [None]:
# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
      % str(pca.explained_variance_ratio_))

plt.figure()
colors = ['navy', 'turquoise', 'darkorange', 'pink']
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2, 3], target_names):
    plt.scatter(X_r[Y == i, 0], X_r[Y == i, 1], color=color, alpha=.8, lw=lw,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA')

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2, 3], target_names):
    plt.scatter(X_r2[Y == i, 0], X_r2[Y == i, 1], alpha=.5, color=color,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('LDA')

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2, 3], target_names):
    plt.scatter(X_r3[Y == i, 0], X_r3[Y == i, 1], alpha=.5, color=color,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('T-SNE')

plt.show()

# T-SNE plot

In [None]:
import time
from sklearn.manifold import TSNE

n_sne = z.shape[0]

time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(df.loc[:,feat_cols].values)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
df_tsne = df.loc[:,:].copy()
df_tsne['x-tsne'] = tsne_results[:,0]
df_tsne['y-tsne'] = tsne_results[:,1]


In [None]:
chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='class') ) \
        + geom_point(size=70,alpha=0.5) \
        + ggtitle("tSNE dimensions colored by class") \
        + xlim(-15, 15) \
        + ylim(-15, 15)
chart

# Test discovered features

In [None]:
""" test_discovered features """
# path to model code and weight
path_pg_gan_code = './src/model/pggan/'
path_model = './asset_model/x-ray_integrated_20190218_network-snapshot-014000.pkl'
sys.path.append(path_pg_gan_code)

path_gan_explore = './asset_results/pggan_x_ray_integrated_norm_axis_explore/'

In [None]:
""" play with the latent space """
sess = tf.InteractiveSession()
with open(path_model, 'rb') as file:
    G, D, Gs = pickle.load(file)

In [None]:
batch_size = 20

latents_c = z_normal[0]
for i_feature in range(1, feature_direction.shape[1]):
    #latents_0 = latents_c - feature_direction[:, i_feature][None, :]*2
    latents_0 = latents_c
    latents_1 = latents_c + feature_direction[:, i_feature][None, :]*2
    
    latents = np.random.randn(batch_size, *Gs.input_shapes[0][1:])
    for i_alpha, alpha in enumerate(np.linspace(0, 1, batch_size)):
        latents[i_alpha, :] = latents_0[0]*(1-alpha) + latents_1[0]*alpha
        print((latents_0[0]*(1-alpha)).shape, (latents_1[0]*alpha).shape)
    # Generate dummy labels (not used by the official networks).
    labels = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:])
    # Run the generator to produce a set of images.
    images = Gs.run(latents, labels)
    images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, 255.0).astype(np.uint8)  # [-1,1] => [0,255]
    images = images.transpose(0, 2, 3, 1)  # NCHW => NHWC
    images = images.reshape(latents.shape[0],1024,1024)
    
    time_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

    # Save images as PNG.
    #for idx in range(images.shape[0]):
    #    PIL.Image.fromarray(images[idx], 'L').save(os.path.join(path_gan_explore,
                                                                  'img_{}_{}_{}.png'.format(time_str, i_feature, idx)))
    #np.save(os.path.join(path_gan_explore, 'img_{}_{}.pkl'.format(time_str, i_feature)), labels)

##
sess.close()