In [None]:
# Inspect the input data

from utils import plot_loss, encoder_files_to_tensors, normalize_params
from utils import sample_files
import time
import glob
import tensorflow as tf
from tensorflow import keras
import yaml
import os
import numpy as np
from datetime import datetime
import argparse
import matplotlib.pyplot as plt
import matplotlib as mpl


In [None]:
# Initialize parameters
data_dir = '/eos/kiliakis/tomo_data/datasets'
# data_dir = './tomo_data/datasets'

# Data specific
IMG_OUTPUT_SIZE = 128
latent_dim = 7  # 6 + the new VrfSPS

# Keep only a small percentage of the entire dataset
# for faster testing.
dataset_keep_percent = 0.001

# Training: 338900
# Validation: 59136 (338901 - 399036)
# Testing: 71526 (398037 - 469562)


In [None]:

# Initialize train/ test / validation paths
ML_dir = os.path.join(data_dir, 'ML_data')
TRAINING_PATH = os.path.join(ML_dir, 'TRAINING')
assert os.path.exists(TRAINING_PATH)

# VALIDATION_PATH = os.path.join(ML_dir, 'VALIDATION')
# assert os.path.exists(VALIDATION_PATH)



In [None]:
# Create the datasets
# First the training data
file_names = sample_files(TRAINING_PATH, dataset_keep_percent, keep_every=51)
# print(sorted(file_names)[:10])

# read input, divide in features/ label, create tensors
x_train, y_train = encoder_files_to_tensors(file_names, normalize=False)

# # Then the validation data
# files = glob.glob(VALIDATION_PATH + '/*.pk')
# files = files[:int(len(files) * dataset_keep_percent)]

# # Shuffle them
# np.random.shuffle(files)
# # read input, divide in features/ label, create tensors
# x_valid, y_valid = encoder_files_to_tensors(files)


In [None]:
%matplotlib inline
# plot some of the outputs

nrows = 2
# Get nrows * nrows random images
# sample = np.random.choice(np.arange(len(x_train)),
#                           size=nrows * nrows, replace=False)

# samples_X = tf.gather(x_train, sample)
# samples_y = tf.gather(y_train, sample)

samples_X = x_train[:nrows*nrows]
samples_y = y_train[:nrows*nrows]


# Create 3x3 grid of figures
fig, axes = plt.subplots(ncols=nrows, nrows=nrows, figsize=(12, 12))
axes = np.ravel(axes)
for i in range(len(axes)):
    ax = axes[i]
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_X[i], cmap='jet')
    # Set the label
    title = ','.join([f'{num:.1f}' for num in samples_y[i]])
    ax.set_title(f'{title}')


In [None]:
%matplotlib inline
nrows = 1
# Get nrows * nrows random images
sample = np.random.choice(np.arange(len(x_train)),
                          size=nrows * nrows, replace=False)

samples_X = tf.gather(x_train, sample)
samples_y = tf.gather(y_train, sample)

# Create 3x3 grid of figures
fig, axes = plt.subplots(ncols=nrows, nrows=nrows, figsize=(8, 8))
axes = np.ravel(axes)
for i in range(len(axes)):
    ax = axes[i]
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_X[i][14:-14, 14:-14], cmap='jet')
    # Set the label
    # title = ','.join([f'{num:.1f}' for num in samples_y[i]])
    print(samples_y[i])
    # ax.set_title(f'{title}')


In [None]:
mean_x_train = x_train.numpy().mean(axis=0)
cropped_mean = mean_x_train[14:-14, 14:-14]
# Create 3x3 grid of figures
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(12, 12))
ax.set_xticks([])
ax.set_yticks([])
# show the image
ax.imshow(cropped_mean, cmap='jet')
# Set the label
ax.set_title(f'Mean of all x_train')

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# y_train = y_train.numpy()
std_scaler = StandardScaler().fit(y_train)
min_max_scaler = MinMaxScaler().fit(y_train)



In [None]:

print('Mean: ', std_scaler.mean_)
print('STD: ', std_scaler.scale_)
print('Min: ', min_max_scaler.data_min_)
print('Max: ', min_max_scaler.data_max_)

# print(tf.reduce_mean(y_train, 0))
# print(tf.math.reduce_std(y_train, 0))


In [None]:
# min_max_norm_data = min_max_scaler.transform(y_train)
# std_norm_data = std_scaler.transform(y_train)
min_max_norm_data = normalize_params(
    y_train[:, 0], y_train[:, 1], y_train[:, 2],
    y_train[:, 3], y_train[:, 4], y_train[:, 5], 
    y_train[:, 6], normalization='minmax')

std_norm_data = normalize_params(
    y_train[:, 0], y_train[:, 1], y_train[:, 2],
    y_train[:, 3], y_train[:, 4], y_train[:, 5], 
    y_train[:, 6], normalization='std')

default_norm_data = normalize_params(
    y_train[:, 0], y_train[:, 1], y_train[:, 2],
    y_train[:, 3], y_train[:, 4], y_train[:, 5], 
    y_train[:, 6], normalization='default')


# Now plot the data distribution
var_names = ['phase_error', 'energy_error',
             'bunch_length', 'intensity', 'V_rf', 'mu', 'Vrf_SPS']
fig, axes = plt.subplots(ncols=4, nrows=len(var_names), sharex=False,
                         sharey=True, figsize=(16, 16))
for i, name in enumerate(var_names):
    hist, edges = np.histogram(min_max_norm_data[i], bins=20, density=False)
    hist = hist / len(y_train[:, i])
    print(name+'-min_max', edges)
    ax = axes[i][0]
    plt.sca(ax)
    plt.bar(np.arange(len(hist)), hist, width=0.8)
    plt.title(name + '-min_max')
    # edges = [f'{e:.4f}' for e in edges]
    plt.xticks(np.arange(len(hist))[[0,-1]], edges[[0,-1]])
    plt.xlabel('Bin')
    plt.ylabel('Density')
    plt.tight_layout()

    hist, edges = np.histogram(std_norm_data[i], bins=20, density=False)
    hist = hist / len(y_train[:, i])
    print(name+'-std', edges)
    ax = axes[i][1]
    plt.sca(ax)
    plt.bar(np.arange(len(hist)), hist, width=0.8)
    plt.title(name + '-std')
    # edges = [f'{e:.4f}' for e in edges]
    plt.xticks(np.arange(len(hist))[[0,-1]], edges[[0,-1]])
    plt.xlabel('Bin')
    plt.ylabel('Density')
    plt.tight_layout()

    hist, edges = np.histogram(default_norm_data[i], bins=20, density=False)
    hist = hist / len(y_train[:, i])
    print(name+'-def', edges)
    ax = axes[i][2]
    plt.sca(ax)
    plt.bar(np.arange(len(hist)), hist, width=0.8)
    plt.title(name + '-def')
    # edges = [f'{e:.4f}' for e in edges]
    plt.xticks(np.arange(len(hist))[[0,-1]], edges[[0,-1]])
    plt.xlabel('Bin')
    plt.ylabel('Density')
    plt.tight_layout()

    hist, edges = np.histogram(y_train[:, i], bins=20, density=False)
    hist = hist / len(y_train[:, i])
    print(name+'-unorm', edges)
    ax = axes[i][3]
    plt.sca(ax)
    plt.bar(np.arange(len(hist)), hist, width=0.8)
    plt.title(name + '-unorm')
    # edges = [f'{e:.4f}' for e in edges]
    plt.xticks(np.arange(len(hist))[[0,-1]], edges[[0,-1]])
    plt.xlabel('Bin')
    plt.ylabel('Density')
    plt.tight_layout()


In [None]:
%matplotlib inline
# plot some of the outputs

nrows = 3
# Get nrows * nrows random images
sample = np.random.choice(np.arange(len(x_train)),
                          size=nrows * nrows, replace=False)

samples_X = tf.gather(x_train, sample)
samples_y = tf.gather(y_train, sample)

# Create 3x3 grid of figures
fig, axes = plt.subplots(ncols=nrows, nrows=nrows, figsize=(12, 12))
axes = np.ravel(axes)
for i in range(len(axes)):
    ax = axes[i]
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_X[i, 14:-14, 14:-14], cmap='jet')
    # Set the label
    title = ','.join([f'{num:.1f}' for num in samples_y[i]])
    ax.set_title(f'{title}')


In [None]:
## For the decoder part
from utils import decoder_files_to_tensors

# read input, divide in features/ label, create tensors
x_train, y_train = decoder_files_to_tensors(file_names)


In [None]:
%matplotlib inline
# plot some of the outputs

nrows = 1
# Get nrows * nrows random images
sample = np.random.choice(np.arange(len(y_train)),
                          size=nrows, replace=False)

samples_real = tf.gather(y_train, sample)

# Create 3x3 grid of figures
fig, axes = plt.subplots(ncols=nrows, nrows=nrows, figsize=(nrows*8, nrows*8))
axes = np.ravel(axes)
for i in range(nrows):
    ax = axes[i]
    plt.sca(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_real[i][14:-14, 14:-14], cmap='jet')
    # Set the label
    # title = ','.join([f'{num:.1f}' for num in samples_X[i]])
    ax.set_title(f'True')
    plt.tight_layout()


In [None]:
%matplotlib inline
# plot some of the outputs

nrows = 5
# Get nrows * nrows random images
sample = np.random.choice(np.arange(len(y_test)),
                          size=nrows, replace=False)

samples_real = y_test[sample]
samples_pred = test_pred[sample]

# Create 3x3 grid of figures
fig, axes = plt.subplots(ncols=3, nrows=nrows, figsize=(12, 20))
# axes = np.ravel(axes)
for i in range(nrows):
    ax = axes[i][0]
    plt.sca(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_real[i]+1, cmap='jet', vmin=0, vmax=2)
    # Set the label
    # title = ','.join([f'{num:.1f}' for num in samples_X[i]])
    ax.set_title(f'True')
    plt.tight_layout()

    ax = axes[i][1]
    plt.sca(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    ax.imshow(samples_pred[i]+1, cmap='jet', vmin=0, vmax=2)
    # Set the label
    # title = ','.join([f'{num:.1f}' for num in samples_X[i]])
    ax.set_title(f'Predicted')
    plt.tight_layout()

    ax = axes[i][2]
    plt.sca(ax)
    ax.set_xticks([])
    ax.set_yticks([])
    # show the image
    plt.imshow(np.abs(samples_real[i] -
                      samples_pred[i]), cmap='jet', vmin=0, vmax=2,
               aspect='auto')
    plt.colorbar()
    # Set the label
    # title = ','.join([f'{num:.1f}' for num in samples_X[i]])
    ax.set_title(f'Diff')
    plt.tight_layout()
