In [1]:
from sqlalchemy import create_engine
from numpy import arange
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer

from neupy import algorithms

import warnings
warnings.filterwarnings('ignore')

import sys
sys.executable

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


'/Users/lianfeng/miniconda3/bin/python'

In [53]:
localhost = {'user': 'postgres', 'password': 'postgres', 'host': 'localhost', 'port': 5432, 'db': 'fiadb'}
params = 'postgresql://{0}:{1}@{2}:{3}/{4}'
engine = create_engine(params.format(localhost['user'], localhost['password'], localhost['host'], localhost['port'], localhost['db']))
pergrid_base = """select distinct * from predictor.pergrid_base_eus"""
pergrid_base_df = pd.read_sql(pergrid_base, engine)

In [54]:
frames = [pergrid_base_df, pergrid_base_df]
pergrid_base_df2 = pd.concat(frames)

In [55]:
pergrid_base_df2.shape

(15246, 23)

In [58]:
# separate predictor variables from outcome varaible
y = pergrid_base_df2['tsr']
grid_id = pergrid_base_df2['grid_id']
var= ['aet', 'ai', 'art', 'ewd', 'fa', 'map', 'mat', 'mpdq',
       'mtcq', 'pet', 'psn', 'ra', 'rmap', 'rmat', 'tsn', 'mfdf', 'alt','shg','mtwq', 'wa']
pergrid_base_df2 = pergrid_base_df2[var]

In [59]:
# encode categorical variable
# pergrid_base_df_encoded=pd.get_dummies(pergrid_base_df, columns=["shg"])

In [60]:
# fill NaN with values from neighbor pixels
imputer = KNNImputer(n_neighbors=5)
pergrid_base_filled = imputer.fit_transform(pergrid_base_df2)
pergrid_base_df = pd.DataFrame(pergrid_base_filled)
pergrid_base_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,65535.0,1.02566,0.164286,-524.708793,0.0711,1148.5,0.309987,131.5,21.304167,1918.111111,0.713689,1.0,1.833333,0.241667,3.698817,5.65,0.011696,1.4,28.626191,302.9049
1,13144.0,0.6577,8.82239,-894732.416005,0.1062,1208.0,12.47678,128.0,19.837514,1833.026596,32.850457,1.0,4.333333,0.679166,169.350913,3.52,0.774757,3.2,27.766178,149.0013
2,13867.0,0.667,7.376786,-656234.650346,0.0549,1245.0,10.635709,130.0,19.96661,1850.611285,27.834838,1.0,7.833333,0.945833,143.805179,3.26,0.628627,5.6,27.982256,164.0862
3,65535.0,0.92404,1.734753,-48762.101679,0.0342,1223.0,2.873174,130.5,20.691381,1898.352941,7.071733,2.0,10.083333,0.6375,36.816597,4.76,0.172147,2.2,28.2394,353.2644
4,65535.0,1.0233,0.631731,-6380.557906,0.0027,1259.2,1.045456,258.4,5.844905,1895.387097,2.569308,2.0,4.0,0.2125,13.451322,4.76,0.068093,1.4,23.520496,121.9797


In [61]:
# standarize predictors
ss = MinMaxScaler()
xstd = ss.fit_transform(pergrid_base_df)

In [62]:
X_train, X_test, Y_train, Y_test = train_test_split(xstd, y, test_size=0.2, shuffle=True)

In [69]:
from sklearn.metrics import mean_squared_error

std_list = arange(0.05, 0.10, 0.010)
for std in std_list:
    nw = algorithms.GRNN(std=std, verbose=False)
    nw.train(X_train, Y_train)
    Y_test_pred = nw.predict(X_test).flatten()
    mae = mean_absolute_error(Y_test_pred, Y_test)
    r2=r2_score(Y_test, Y_test_pred)
    print(std, mae)

0.05 1.5531803674178601
0.060000000000000005 1.8826688839718047
0.07 2.209261938179978
0.08000000000000002 2.507850011392867
0.09000000000000001 2.771643393307554


In [70]:
nw = algorithms.GRNN(std=0.05, verbose=False) # 0.108 3.0961740739964703

In [71]:
nw.train(X_train, Y_train)

### Evaluation Metrics

In [72]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [None]:
Y_train_pred = nw.predict(X_train).flatten()
print('train MAE', mean_absolute_error(Y_train_pred, Y_train))
print('train r2', r2_score(Y_train, Y_train_pred))

In [None]:
Y_test_pred = nw.predict(X_test).flatten()
print('test MAE', mean_absolute_error(Y_test_pred, Y_test))
print('test r2', r2_score(Y_test, Y_test_pred))

In [24]:
grnn_y_test_eus = pd.DataFrame(
    {'tsr': Y_test,
     'tsr_predicted': Y_test_pred})
grnn_y_test_eus.to_sql(name='grnn_y_test_eus', con=engine, schema='predictor', if_exists='replace', index=False)

### Ingest TSR predictions to PG

In [385]:
y_pred = nw.predict(xstd).flatten()

In [386]:
pergrid_all_predicted = pd.DataFrame(
    {'grid_id': grid_id,
     'tsr': y,
     'tsr_predicted': y_pred})

In [387]:
pergrid_all_predicted.head()

Unnamed: 0,grid_id,tsr,tsr_predicted
0,110,4.0,3.276433
1,111,2.0,2.000123
2,195,5.0,4.994345
3,337,1.0,2.803938
4,338,1.0,2.795763


In [388]:
pergrid_all_predicted.to_sql(name='grnn', con=engine, schema='predictor', if_exists='replace', index=False)

In [389]:
update_geom = """
alter table predictor.grnn add column if not exists wkb_geometry geometry(Polygon,4269);
update predictor.grnn A SET wkb_geometry = B.wkb_geometry
FROM predictor.pergrid_base B
WHERE A.grid_id = B.grid_id
"""

In [390]:
connection = engine.connect()
connection.execute(update_geom)

<sqlalchemy.engine.result.ResultProxy at 0x7fdb21aa49d0>

In [391]:
update_residual = """
alter table predictor.grnn add column residual double precision;
update predictor.grnn set residual = (tsr_predicted-tsr);
"""

In [392]:
connection = engine.connect()
connection.execute(update_residual)

<sqlalchemy.engine.result.ResultProxy at 0x7fda41cc1050>