In [1]:
# import utility libraries
from netCDF4 import Dataset
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
import cv2
from geojson import FeatureCollection, Polygon, Feature, dump
%matplotlib inline

# import machine learning tools
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential, Graph
from keras.layers import Dense, Flatten, Activation, Dropout
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.optimizers import SGD
from keras import backend as K
from keras.models import model_from_json
from sklearn.metrics import roc_auc_score, accuracy_score

from clustering import Location_Clusterer
from util import plot_list_in_2D, reformat_y, plot_compare_map, scale_data, evaluate_model
from nn_input import NN_Input
from build_NN import build_base_sequential_NN

Using Theano backend.
Using gpu device 0: GRID K520 (CNMeM is disabled, cuDNN not available)


In [2]:
folder = '/home/ubuntu/dataset/'
image_paths = []
n_clusters = 8

models = {}
df_cluster = None

for cluster in xrange(n_clusters):
    json_file = '/home/ubuntu/fall-foliage-finder/models/tmin/model_architecture_'+str(cluster)+'.json'
    weights_file = '/home/ubuntu/fall-foliage-finder/models/tmin/model_weights_'+str(cluster)+'.h5'
    
    sgd = SGD()
    models[cluster] = model_from_json(open(json_file).read())
    models[cluster].load_weights(weights_file)
    models[cluster].compile(loss='categorical_crossentropy',
                              optimizer=sgd,
                              metrics=['accuracy'])
    
    df_subset = pd.read_table('/home/ubuntu/fall-foliage-finder/models/tmin/cluster_list_'+str(cluster)+'.txt',
                             delimiter=' ', names = ['xs', 'ys'])
    df_subset['cluster_num'] = np.ones(len(df_subset), dtype=int)*cluster
    df_cluster = pd.concat((df_cluster, df_subset))
    
df_scaling = pd.read_table('/home/ubuntu/fall-foliage-finder/models/tmin/scaling_weights.txt', delimiter=' ')

In [None]:
nn = NN_Input(predict=2, history=5, box=5, random_seed=42)
nn.load_labels(folder+'sign.label.nc', 'Band1')

f_paths = ['all.mean.of.Tmin.nc', 'all.mean.of.Tmin.nc']
variables = ['Tmin', 'Tmin']
names = ['mean_tmin_history', 'mean_tmin_forecast']
feature_types = ['history_time_series', 'forecast_time_series']

for f_path, v, n, feature_type in zip(f_paths, variables, names, feature_types):
    nn.load_features(folder+f_path, v, n, feature_type)

In [None]:
threshold = 0.5
res = 0.0625/2


for timepoint in xrange(217, 227):
    print 'for timepoint', timepoint
    ids, y_true, y_predict = [], [], []
    
    for cluster, model in models.iteritems():
        print 'for cluster', cluster
        subset = df_cluster[df_cluster.cluster_num == cluster][['xs', 'ys']].values
        id_test, y_test, X_map_test = nn.select(t=timepoint, subset=subset)
        y_test = reformat_y(y_test)
        X_test = scale_data(X_map_test, df_scaling.iloc[cluster]['mean'], df_scaling.iloc[cluster]['std'])
        test_predict = model.predict(X_test)
        if ids == []:
            ids = id_test
            y_true = y_test
            y_predict = test_predict
        else:
            ids = np.vstack((ids, id_test))
            y_true = np.vstack((y_true, y_test))
            y_predict = np.vstack((y_predict, test_predict))
        
    timestamp = nn.times[(ids[0,0]).astype(int)]
    xs = nn.lons[(ids[:,2]).astype(int)]
    ys = nn.lats[(ids[:,1]).astype(int)]
    
    features=[]
    for x, y, val_true, val_predict in zip(xs, ys, y_true[:,0], y_predict[:,0]):
        poly = Polygon([[(x-res, y-res), (x-res, y+res), (x+res, y+res), (x+res, y-res), (x-res, y-res)]])
        predict_class = int(val_predict*10)
        features.append(Feature(geometry=poly, properties={"true": float(val_true), "predict": float(predict_class)}))
    fc = FeatureCollection(features)
    
    #path = plot_compare_map(xs, ys, y_true[:,0], (y_predict[:,0]>threshold), timestamp)
    #image_paths.append(path)
    
    with open(folder+'output/'+str(int(timestamp))+'.geojson', 'w') as outfile:
        dump(fc, outfile)

for timepoint 217
for cluster 0
for cluster 1
