# This is the notebook to preprocess the data.

In [39]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from acse_9_irp_wafflescore import dataPreprocessing as dp
from scipy.ndimage import gaussian_filter
import numpy as np

import logging
import sys

logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s',
                     level=logging.INFO, stream=sys.stdout)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


The user can specify which data files/model to use here, the if/else statement were used for easier management.
For future usage, additional model name and input files can be added.

The input files in this stage were generated from Dr. Michele Paulatto, the script to generate those files are located in the Synthetic model folder in the Github repository.

In [40]:
# name of the model for easier reference
model = 'M1'
# the title of each column's variable
col_name = ['vp', 'vs', 'dn', 'vp/vs', 'qp', 'qs', 'x', 'z']


In [41]:
# load needed files
if(model == 'M1'):
    # Original Earth Model
    input_npz = np.load('../Synthetic Model/input_fields.npz')
    output_smooth_npz = np.load('../Synthetic Model/output_fields_smooth.npz')
    output_npz = np.load('../Synthetic Model/output_fields.npz')
elif(model == 'M5a'):
    # Simplified Earth Model
    input_npz = np.load('../Synthetic Model/Model5a/input_fields.npz')
    output_smooth_npz = np.load('../Synthetic Model/Model5a/output_fields_smooth.npz')
    output_npz = np.load('../Synthetic Model/Model5a/output_fields.npz')
elif(model == 'M5b'):
    # Simplified Earth Model -- less temperature anomaly
    input_npz = np.load('../Synthetic Model/Model5b/input_fields.npz')
    output_smooth_npz = np.load('../Synthetic Model/Model5b/output_fields_smooth.npz')
    output_npz = np.load('../Synthetic Model/Model5b/output_fields.npz')
else:
    # invalid model
    print('Invalid model', model)

In [49]:
# convert npz into 1d, 2d numpy
init_label = dp.convLabel(input_npz['classes'])
init_data = dp.convData(output_smooth_npz)

# remove water and perform data preprocessing
water_idx = np.where(init_label == 0)
label = np.delete(init_label, water_idx)
data = np.delete(init_data, water_idx, axis=0)

fdir = '../data/' + model + '_init_data.npy'
np.save(fdir, data)
logging.info('Initial Data as numpy saved at: %s' % fdir)

fdir = '../data/' + model + '_init_label.npy'
np.save(fdir, label)
logging.info('Initial label as numpy saved at: %s' % fdir)

data = dp.data_cleanup(data, col_name, re_inf=-9999)
logging.debug("Water removed shape: (%d, %d)" %
              (data.shape[0], data.shape[1]))

if (model):
    fdir = '../data/' + model + '_clean_data.npy'
    np.save(fdir, data)
    logging.info('Data saved at: %s' % fdir)

    fdir = '../data/' + model + '_data_label.npy'
    np.save(fdir, label)
    logging.info('Data label saved at: %s' % fdir)

    fdir = '../data/' + model + '_xz_pos.npy'
    np.save(fdir, data[:, -2:])
    logging.info('XZ positions saved at: %s' % fdir)

2019-08-28 01:16:22,380 | INFO : Initial Data as numpy saved at: ../data/M1_init_data.npy
2019-08-28 01:16:22,383 | INFO : Initial label as numpy saved at: ../data/M1_init_label.npy
2019-08-28 01:16:22,385 | INFO : Replacing INF with -9999
2019-08-28 01:16:22,394 | INFO : Data saved at: ../data/M1_clean_data.npy
2019-08-28 01:16:22,397 | INFO : Data label saved at: ../data/M1_data_label.npy
2019-08-28 01:16:22,402 | INFO : XZ positions saved at: ../data/M1_xz_pos.npy
