In [3]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import floor, ceil
import seaborn as sns

#import xarray as xr
import tensorflow as tf
print('All packages imported.')

# Reproducibility
random_state = 42
np.random.seed(random_state)
tf.set_random_seed(random_state)

All packages imported.


In [2]:
# Mount Google Drive locally
from google.colab import drive
drive.mount('/content/gdrive')

# Check data list
!ls "/content/gdrive/My Drive/Colab Notebooks/data/"
!ls '/tmp'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
ARM_1hrlater.csv  ARM_no_dropna.cdf  ARM_strict_dropna.csv
drivefs_ipc.0  drivefs_ipc.0_shell


In [4]:
# Read data
DATADIR = '/content/gdrive/My Drive/Colab Notebooks/data'
f = DATADIR + '/ARM_1hrlater.csv'
df = pd.read_csv(f,index_col=0) # the first column in .csv is index

# Double check NaN does not exist
print('There are {} NaN in the data.'.format(df.isnull().sum().sum()))
#df

There are 0 NaN in the data.


In [0]:
# Generate inputs and labels
input = df.drop(columns='prec_sfc_1hrlater')
label = df['prec_sfc_1hrlater']

In [0]:
# Split data
train_size = 0.75
train_cnt = floor(input.shape[0] * train_size)

x_train = input.iloc[0:train_cnt].values
y_train = label.iloc[0:train_cnt].values.reshape([-1,1])
x_test = input.iloc[train_cnt:].values
y_test = label.iloc[train_cnt:].values.reshape([-1,1])

# Normalize everything using mean/std of training data
norm_mean, norm_std = [], []
for col in range(x_train.shape[1]):
  _mean = x_train[:,col].mean()
  _std = x_train[:,col].std()
  x_train[:,col] = (x_train[:,col] - _mean)/ _std
  x_test[:,col] = (x_test[:,col] - _mean)/ _std
  
  norm_mean = np.append(norm_mean, _mean)
  norm_std = np.append(norm_std, _std)

# All precipitation uses the same normalization constants
prec_mean, prec_std = norm_mean[2], norm_std[2]
y_train = (y_train - prec_mean)/ prec_std
y_test = (y_test - prec_mean)/ prec_std

In [8]:
# Run Tensorboard in the background
LOGDIR = '/tmp/log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOGDIR)
)

# Use ngrok to tunnel traffic to localhost
! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip ngrok-stable-linux-amd64.zip
get_ipython().system_raw('./ngrok http 6006 &')

# Retrieve public url
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

--2018-12-10 13:26:31--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 35.173.6.94, 34.238.3.58, 34.235.97.255, ...
Connecting to bin.equinox.io (bin.equinox.io)|35.173.6.94|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5363700 (5.1M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip.1’


2018-12-10 13:26:33 (3.53 MB/s) - ‘ngrok-stable-linux-amd64.zip.1’ saved [5363700/5363700]

Archive:  ngrok-stable-linux-amd64.zip
replace ngrok? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: ngrok                   
http://e2289566.ngrok.io


In [0]:
# Network Parameters
n_in = x_train.shape[1] # number of input
n_hid_1 = 256 #neuron in the 1st layer
n_hid_2 = 128
n_hid_3 = 64
n_hid_4 = 32
n_out = y_train.shape[1] # number of output

# Create layer template
def layer(x, size_in, size_out, relu_bool=True, name='layer'):
  with tf.name_scope(name):
    with tf.name_scope('weights'):
      weight = tf.Variable(tf.random_normal([size_in, size_out], stddev=1), name='weight')
      tf.summary.histogram('weights', weight)
    with tf.name_scope('biases'):
      bias = tf.Variable(tf.constant(0.1, shape=[size_out]), name='bias')
      tf.summary.histogram('biases', bias)
      
    with tf.name_scope('pre_activations'):
      _layer = tf.add(tf.matmul(x,weight), bias)
      tf.summary.histogram('pre_activations', _layer)
    
    if relu_bool:
      with tf.name_scope('activations'):
        _layer = tf.nn.relu(_layer)
        tf.summary.histogram('activations', _layer)
      
    return _layer
  
# Batch normalization template
def bn(x, trainable, name='bn'):
  with tf.name_scope('batch_norm'):
    _mean, _var, _epsilon, _offset, _scale = 0, 1, 1e-4, 0, 1
    _layer = tf.nn.batch_normalization(x, mean=_mean, variance=_var, offset=_offset, scale=_scale, variance_epsilon=_epsilon, name='batch_norm')
      
  return _layer

In [0]:
# Training parameters
num_epoch = 100000
batch_size = 727 # 727*5 batch = 3635 training sample
display_epoch = 5000
summ_epoch = 5
MODELDIR = '/tmp/model/model.ckpt'

# Create NN model 
def neural_net(learning_rate):
  tf.reset_default_graph() # clear graph stack
  sess = tf.Session() # declare a session
  
  # tf Graph input
  X = tf.placeholder("float", [None, n_in], name='inputs')
  Y = tf.placeholder("float", [None, n_out], name='labels')
  
  # Layer connection
  #layer_0 = bn(X, False, 'bn_0')
  layer_1 = layer(X, n_in, n_hid_1, True,  'layer_1')
  layer_2 = layer(layer_1, n_hid_1, n_hid_2, True, 'layer_2')
  layer_3 = layer(layer_2, n_hid_2, n_hid_3, True, 'layer_3')
  layer_4 = layer(layer_3, n_hid_3, n_hid_4, True, 'layer_4')
  layer_out = layer(layer_4, n_hid_4, n_out, False, 'layer_out')
  
  # True data information
  with tf.name_scope('constant'):
    _prec_mean = tf.constant(prec_mean.astype(np.float32), name='prec_mean')
    _prec_std = tf.constant(prec_std.astype(np.float32), name='prec_std')
  
  # Loss function
  with tf.name_scope('losses'):
    loss = tf.reduce_mean(tf.square(tf.square(layer_out - Y)), name='loss') # mean-quartic-error
    trueloss = tf.reduce_mean(tf.multiply(tf.abs(layer_out - Y), _prec_std) + _prec_mean) # De-normalized mean loss
  tf.summary.scalar('loss', loss)
  tf.summary.scalar('trueloss', trueloss)
  
  # Optimizer
  with tf.name_scope('train'):
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
  
  summ = tf.summary.merge_all() # merge all summaries for Tensorboard
  saver = tf.train.Saver() # declare NN config saver
  
  # Draw graph
  sess.run(tf.global_variables_initializer()) # initialize session
  
  writer = tf.summary.FileWriter(LOGDIR + '/train') # a file writer
  writer.add_graph(sess.graph) # write the graph in the session
  
  # Train
  for epoch in range(1, num_epoch+1):
    # Batching
    loss_avg = 0.0
    total_batch = int(len(x_train) / batch_size)
    x_batches = np.array_split(x_train, total_batch)
    y_batches = np.array_split(y_train, total_batch)     
    for i in range(total_batch):
      batch_x, batch_y = x_batches[i], y_batches[i]
      _opt, _loss, _summ = sess.run([optimizer, trueloss, summ], feed_dict={X: batch_x,
                                                                            Y: batch_y})           
      loss_avg += _loss / total_batch
    
    if epoch % summ_epoch == 0:
      writer.add_summary(_summ, epoch)
    
    if epoch % display_epoch == 0:
      print("epoch " + str(epoch) + ", training mean true loss=" + "{:.5f}".format(np.sqrt(loss_avg)))
  print("Optimization Finished!")
  
  # Save NN model
  save_path = saver.save(sess, MODELDIR)
  print("Model saved in path: %s" % save_path)

  # Test
  loss_test, output_test = sess.run([trueloss, layer_out], feed_dict={X: x_test,
                                                                      Y: y_test})
  print("testing mean true loss=" + "{:.5f}".format(np.sqrt(loss_test)))
  sess.close()
  return loss_test, output_test

In [0]:
# Construct model
learning_rate = 1e-3
loss_test, output_test = neural_net(learning_rate)

epoch 5000, training mean true loss=24.19718
epoch 10000, training mean true loss=1.96854
epoch 15000, training mean true loss=10.45553
epoch 20000, training mean true loss=10.21432
epoch 25000, training mean true loss=7.45508
epoch 30000, training mean true loss=0.70877
epoch 35000, training mean true loss=2.64760
epoch 40000, training mean true loss=4.77746
epoch 45000, training mean true loss=2.99932
epoch 50000, training mean true loss=2.37947


In [0]:
# Save log and model
!mv /tmp /content/gdrive/My\ Drive/Colab\ Notebooks/tmp

# (Alternative) Download model
#from google.colab import files
#!apt-get install zip && zip -r tmp.zip /tmp
#files.download('tmp.zip')

In [11]:
np.abs((output_test-y_test)* prec_std + prec_mean).max()

1072.6562033728535

In [1]:
# Plot
outputs = output_test * prec_std + prec_mean
labels = y_test * prec_std + prec_mean

plt.scatter(labels, outputs)

plt.xlabel('True precipitation')
plt.ylabel('Predicted precipitation')
axes = plt.gca()
#axes.set_xlim([0,20])
#axes.set_ylim([0,20])
plt.show()

NameError: ignored