<a href="https://colab.research.google.com/github/prinaldi3/Denoising/blob/main/Denoising.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Installations and Imports

In [None]:
!python -m pip install pip==21.0.1

In [None]:
!rm -rf xca
!git clone https://github.com/maffettone/xca

In [None]:
%%bash
cd xca
python -m pip install .

Training

In [None]:
#Importing from XCA package

import xca
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from xca.ml.tf_models import build_CNN_encoder_model, build_CNN_decoder_model, VAE_denoising_training, VAE

In [None]:
#Specify directories
dataset_paths = ['/content/gdrive/MyDrive/BaTiO_training/BTO_sim.tfrecord'] #alter this to change the dataset you're training on
out_dir = '/content/gdrive/MyDrive/BaTiO_training/training_1' #your training checkpoints will go here 

In [None]:
#Build encoder/decoder networks using XCA methods

out_dir = '/content/gdrive/MyDrive/BaTiO_training/training_1' #saves training checkpoints to this directory
encoder, last_conv_shape = build_CNN_encoder_model(data_shape=(1750,1), 
                                                   latent_dim=100, 
                                                   dense_dims=[], 
                                                   filters=[32, 32, 16], 
                                                   kernel_sizes=[16, 16, 16], 
                                                   strides=[1, 1, 1], 
                                                   pool_sizes=[1, 1, 1], 
                                                   paddings=["same"]*3, 
                                                   verbose=False) #change this to True to see Keras.summary() of model

decoder = build_CNN_decoder_model(data_shape=(1750,1), 
                                  latent_dim=100, 
                                  last_conv_layer_shape=last_conv_shape, 
                                  filters = [16, 32, 32], 
                                  kernel_sizes=[16, 16, 16], 
                                  strides=[1, 1, 1], 
                                  paddings=["same"]*4, 
                                  verbose=False) 

In [None]:
#Build the VAE and train!

kl_loss_factor = 0 #KL-Loss factor is set to 0 (we only care about reconstructions)
vae = VAE(encoder, decoder, kl_loss_factor) 
vae.built=True

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-5, momentum=1e-5)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)

res = VAE_denoising_training(vae,
                            dataset_paths=dataset_paths,
                            out_dir=out_dir,
                            log_noise_min=log_noise_min,
                            log_noise_max=log_noise_max,
                            batch_size=64,
                            multiprocessing=16,
                            categorical=True,
                            data_shape=(1750, 1),
                            n_epochs=20,
                            optimizer=optimizer,
                            checkpoint_rate=100/20,
                            verbose=True
                            )


Plotting Results

In [None]:
#Plot learning curves
fig, axs = plt.subplots(1,2,figsize=(10,5))
axs[0].plot(res["reconstruction_loss"])
axs[0].set_title("reconstruction Loss")
axs[1].plot(res["kl_loss"])
axs[0].set_ylim((0,40))
axs[1].set_title("KL Loss")

In [None]:
from xca.ml.tf_data_proc import build_dataset

#Build dataset
log_noise_min= -1 #controls degree of noise
log_noise_max= -.5

def preprocess(data, label):
    X = tf.cast(data, tf.float32)
    X = (X - tf.math.reduce_min(X, axis=0, keepdims=True)) / (
        tf.math.reduce_max(X, axis=0, keepdims=True)
        - tf.math.reduce_min(X, axis=0, keepdims=True)
    )
    noisy = tf.cast(data, tf.float32) + tf.random.normal(
        data.shape,
        stddev=10 ** np.random.uniform(log_noise_min, log_noise_max),
        dtype=tf.float32,
    )
    noisy = (noisy - tf.math.reduce_min(noisy, axis=0, keepdims=True)) / (
        tf.math.reduce_max(noisy, axis=0, keepdims=True)
        - tf.math.reduce_min(noisy, axis=0, keepdims=True)
    )
    return {"X": X, "X_noisy": noisy, "label": label}

dataset, _ = build_dataset(
    dataset_paths=dataset_paths,
    batch_size=1,
    multiprocessing=1,
    categorical=True,
    val_split=0.0,
    data_shape=(1750,1),
    # Preprocessing step adding noise and assuming probabilities needed on [0,1] and not on [-1,1]
    preprocess=preprocess
)


In [None]:
#Plot reconstructions

for batch in dataset:
  output = vae(batch["X_noisy"], training=False)
  X_denoise = output["reconstruction"][0]
  X = batch["X"][0]
  X_noisy = batch["X_noisy"][0]
  label = batch["label"][0]
  fig, ax = plt.subplots(figsize=(10, 10))
  ax.plot(X_noisy+1.5, label="Raw", color='gray')
  ax.plot(X + .5, label="True", color='k')
  ax.plot(X_denoise+.5, 'r--', label="Denoise")
  ax.plot(abs(X-X_denoise), color='b', label="Residual")
  ax.set_title("Comparison of True and Denoised Patterns")
  ax.set_xlabel("Pixel #")
  ax.set_ylabel("Intensity [Arb.]")
  
  plt.rc('font', size=20)          # controls default text sizes
  plt.rc('axes', titlesize=30)     # fontsize of the axes title
  plt.rc('axes', labelsize=30)    # fontsize of the x and y labels

  ax.get_yaxis().set_ticks([])
  #fig.savefig('/content/gdrive/MyDrive/poster_figs/batio_reconstruction_2', facecolor='white')
  plt.legend()
  break