In [11]:
from sqlalchemy import create_engine
from numpy import arange
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer

from neupy import algorithms

import warnings
warnings.filterwarnings('ignore')

import sys
sys.executable

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [3]:
localhost = {'user': 'postgres', 'password': 'postgres', 'host': 'localhost', 'port': 5432, 'db': 'fiadb'}
params = 'postgresql://{0}:{1}@{2}:{3}/{4}'
engine = create_engine(params.format(localhost['user'], localhost['password'], localhost['host'], localhost['port'], localhost['db']))
pergrid_base = """select distinct * from predictor.pergrid_base"""
pergrid_base_df = pd.read_sql(pergrid_base, engine)

In [4]:
pergrid_base_df.columns

Index(['grid_id', 'aet', 'ai', 'art', 'ewd', 'fa', 'map', 'mat', 'mpdq',
       'mtcq', 'pet', 'psn', 'ra', 'rmap', 'rmat', 'tsn', 'mfdf', 'alt', 'shg',
       'mtwq', 'wkb_geometry', 'tsr', 'wa'],
      dtype='object')

In [5]:
# separate predictor variables from outcome varaible
y = pergrid_base_df['tsr']
grid_id = pergrid_base_df['grid_id']
var= ['aet', 'ai', 'art', 'ewd', 'fa', 'map', 'mat', 'mpdq',
       'mtcq', 'pet', 'psn', 'ra', 'rmap', 'rmat', 'tsn', 'mfdf', 'alt','shg','mtwq', 'wa']
pergrid_base_df = pergrid_base_df[var]

In [6]:
# encode categorical variable
# pergrid_base_df_encoded=pd.get_dummies(pergrid_base_df, columns=["shg"])

In [7]:
# fill NaN with values from neighbor pixels
imputer = KNNImputer(n_neighbors=5)
pergrid_base_filled = imputer.fit_transform(pergrid_base_df)
pergrid_base_df = pd.DataFrame(pergrid_base_filled)
pergrid_base_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,6000.0,0.3675,21.191667,-2523666.0,7.7058,681.0,20.036236,87.0,16.299375,1826.768199,50.411522,16.0,2.833333,0.604166,444.860212,2.27,11.597701,5.8,28.7925,4.1166
1,65535.0,0.3829,20.233612,-2537902.0,14.1498,699.0,20.298634,92.0,16.340292,1811.927757,49.797996,14.0,2.0,0.620833,444.259982,2.1,5.98289,5.0,28.717458,11.8152
2,65535.0,1.30362,0.164286,-524.7088,0.0711,1148.5,0.309987,131.5,21.304167,1918.111111,0.713689,1.0,1.833333,0.241667,3.698817,6.524,0.011696,2.0,28.626191,302.9049
3,5135.0,0.2898,23.950167,-2941595.0,3.0042,565.0,20.193576,76.0,15.517042,1972.128352,43.00118,108.0,2.333333,0.391667,492.281141,2.12,61.886973,5.4,29.458834,1.2528
4,5645.0,0.3017,23.526261,-2959517.0,10.035,580.0,20.710174,80.0,15.754,1928.968811,44.328019,34.0,3.416667,0.620833,489.587254,2.12,32.270955,5.0,29.300416,4.248


In [8]:
# standarize predictors
ss = MinMaxScaler()
xstd = ss.fit_transform(pergrid_base_df)
# ystd = ss.fit_transform(y.to_numpy().reshape(-1, 1))

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(xstd, y, test_size=0.1, shuffle=True)

In [None]:
std_list = arange(0.05, 1, 0.005)
score_list = {}
for std in std_list:
    nw = algorithms.GRNN(std=std, verbose=False)
    nw.train(X_train, Y_train)
    Y_test_pred = nw.predict(X_test).flatten()
    mae = mean_absolute_error(Y_test_pred, Y_test)
    score_list.update({std:mae})

In [359]:
sorted_score_list = {k: v for k, v in sorted(score_list.items(), key=lambda item: item[1])}

0.107 4.363598602876512
0.108 4.363648415523315
0.109 4.363843217234224


In [379]:
nw = algorithms.GRNN(std=0.115, verbose=False) # 0.108 3.0961740739964703

In [380]:
nw.train(X_train, Y_train)

### Evaluation Metrics

In [381]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [382]:
Y_train_pred = nw.predict(X_train).flatten()
print('train MAE', mean_absolute_error(Y_train_pred, Y_train))
print('train r2', r2_score(Y_train, Y_train_pred))

train MAE 2.195526005601421
train r2 0.9311774290082482


In [383]:
Y_test_pred = nw.predict(X_test).flatten()
print('test MAE', mean_absolute_error(Y_test_pred, Y_test))
print('test r2', r2_score(Y_test, Y_test_pred))

test MAE 3.142242743736709
test r2 0.8858662888890027


In [384]:
grnn_y_test = pd.DataFrame(
    {'tsr': Y_test,
     'tsr_predicted': Y_test_pred})
grnn_y_test.to_sql(name='grnn_y_test', con=engine, schema='predictor', if_exists='replace', index=False)

### Ingest TSR predictions to PG

In [385]:
y_pred = nw.predict(xstd).flatten()

In [386]:
pergrid_all_predicted = pd.DataFrame(
    {'grid_id': grid_id,
     'tsr': y,
     'tsr_predicted': y_pred})

In [387]:
pergrid_all_predicted.head()

Unnamed: 0,grid_id,tsr,tsr_predicted
0,110,4.0,3.276433
1,111,2.0,2.000123
2,195,5.0,4.994345
3,337,1.0,2.803938
4,338,1.0,2.795763


In [388]:
pergrid_all_predicted.to_sql(name='grnn', con=engine, schema='predictor', if_exists='replace', index=False)

In [389]:
update_geom = """
alter table predictor.grnn add column if not exists wkb_geometry geometry(Polygon,4269);
update predictor.grnn A SET wkb_geometry = B.wkb_geometry
FROM predictor.pergrid_base B
WHERE A.grid_id = B.grid_id
"""

In [390]:
connection = engine.connect()
connection.execute(update_geom)

<sqlalchemy.engine.result.ResultProxy at 0x7fdb21aa49d0>

In [391]:
update_residual = """
alter table predictor.grnn add column residual double precision;
update predictor.grnn set residual = (tsr_predicted-tsr);
"""

In [392]:
connection = engine.connect()
connection.execute(update_residual)

<sqlalchemy.engine.result.ResultProxy at 0x7fda41cc1050>