In [1]:
# import utility libraries
from netCDF4 import Dataset
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
import cv2
%matplotlib inline

# import machine learning tools
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential, Graph
from keras.layers import Dense, Flatten, Activation, Dropout
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.optimizers import SGD
from keras import backend as K
from sklearn.metrics import roc_auc_score, accuracy_score

from clustering import Location_Clusterer
from util import plot_list_in_2D, reformat_y, plot_compare_map
from nn_input import NN_Input
from build_NN import build_base_sequential_NN

Using Theano backend.
Using gpu device 0: GRID K520 (CNMeM is disabled, cuDNN not available)


In [None]:
n = 8
folder = '/home/ubuntu/dataset/'
files = ['veg.nc', 'ppt.monthly.mask.nc', 'tmean.monthly.mask.nc', 'elev.nc']
var_names = ['Cv', 'Band1', 'Band1', 'elev']

lc = Location_Clusterer(n_clusters=n)
for f, var in zip(files, var_names):
    lc.read_data(folder+f, var)

lc.transform_data()
clusters = lc.fit_predict(lc.data2d)

In [None]:
nn = NN_Input(predict=2, history=5, box=5, random_seed=42)
nn.load_labels(folder+'sign.label.nc', 'Band1')

# f_paths = ['all.ndvi.nc','all.max.of.Wind.nc', 'all.min.of.Tmin.nc', 'all.mean.of.Tmin.nc', 'all.sum.of.Prec.nc',
#            'all.max.of.Tmax.nc', 'all.mean.of.Tmax.nc','elev.nc', 'veg.nc']
# variables = ['Band1', 'Wind', 'Tmin', 'Tmin', 'Prec', 'Tmax', 'Tmax', 'elev', 'Cv']
# names = ['ndvi', 'max_wind', 'min_tmin', 'mean_tmin', 'total_prec', 'max_tmax', 'mean_tmax', 'elev', 'veg']
# feature_types = ['history_time_series', 'forecast_time_series', 'forecast_time_series', 'forecast_time_series',
#                  'forecast_time_series', 'forecast_time_series', 'forecast_time_series',
#                 'single_layer', 'multi_layers']

f_paths = ['all.mean.of.Tmin.nc', 'all.mean.of.Tmin.nc']
variables = ['Tmin', 'Tmin']
names = ['mean_tmin_history', 'mean_tmin_forecast']
feature_types = ['history_time_series', 'forecast_time_series']

for f_path, v, n, feature_type in zip(f_paths, variables, names, feature_types):
    nn.load_features(folder+f_path, v, n, feature_type)

In [None]:
point = [308, 290]
for i, loc in enumerate(lc.ind2d):
    if loc[0] == point[0] and loc[1] == point[1]:
        cluster = clusters[i]

print 'Modeling for cluster', cluster
subset = lc.ind2d[clusters==cluster]

print 'Getting training dataset'
id_train, y_train, X_map_train = nn.select(n=100000, subset=subset)
y_train = reformat_y(y_train)

In [None]:
# Scaling data

map_dimensions=X_map_train[0].shape
print map_dimensions
print np.all(X_map_train[0] == X_map_train[1])

mean_train = np.mean(X_map_train.flatten())
std_train = np.std(X_map_train.flatten())
X_map_train = (X_map_train-mean_train)/std_train
X_map_test = (X_map_test-mean_train)/std_train
print y_train.shape

In [None]:
model = build_base_sequential_NN(nb_filters=64, map_dimensions=map_dimensions)
model.fit(X_map_train, y_train, batch_size=50, nb_epoch=5, verbose=True, validation_data=(X_map_test, y_test))
train_predict = model.predict(X_map_train, verbose=True)
test_predict = model.predict(X_map_test, verbose=True)

In [None]:
threshold = 0.5

print 'Training set:'
print 'Bench mark:', np.sum(y_train[:,1])/float(len(y_train))
print 'Accuracy:', accuracy_score(y_train[:,0], (train_predict[:,0]>threshold))
print 'ROC AUC:', roc_auc_score(y_train[:,0], train_predict[:,0])
print 
print 'Testing set:'
print 'Bench mark:', np.sum(y_test[:,1])/float(len(y_test))
print 'Accuracy:', accuracy_score(y_test[:,0], (test_predict[:,0]>threshold))
print 'ROC AUC:', roc_auc_score(y_test[:,0], test_predict[:,0])

In [None]:
image_paths = []
for t in xrange(216, 217):

    id_test, y_test, X_map_test = nn.select(t=t, subset=subset)
    y_test = reformat_y(y_test)
    X_map_test = (X_map_test-mean_train)/std_train
    
    timestamp = nn.times[(id_test[0,0]).astype(int)]
    xs = nn.lons[(id_test[:,2]).astype(int)]
    ys = nn.lats[(id_test[:,1]).astype(int)]
    test_predict = model.predict(X_map_test)
    
    path = plot_compare_map(xs, ys, y_test[:,0], (test_predict[:,0]>threshold), timestamp)
    image_paths.append(path)