In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# If memory growth is enabled for a PhysicalDevice,
# the runtime initialization will not allocate all memory on the device. 
# Memory growth cannot be configured on a PhysicalDevice with virtual devices configured.
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [13]:
import sys
sys.path.append("./backend/models/src/")


import numpy as np

import utils
from lstm import LSTM
import training as train

In [14]:
df, geohash_idx2hash = munge.prepare_historical_data(min_magnitude=3, 
                                                     cols_to_keep=["Longitude", "Latitude", "Magnitude"])
df.head(3)

Unnamed: 0,magnitude,geohash,geohash_idx
0,6.0,x5,385
1,5.8,wb,358
2,6.2,2h,17


In [15]:
# get target
y = utils.get_y(df)

In [18]:
# get valid memory/batch size/training proportion combos
param_combos = utils.get_param_combos(y)
param_combos

[(0.7, 1, {2341}),
 (0.71, 2, {4}),
 (0.71, 6, {8}),
 (0.72, 1, {5}),
 (0.72, 6, {10, 25, 50}),
 (0.73, 2, {4, 8, 16}),
 (0.75, 1, {2}),
 (0.75, 3, {6}),
 (0.76, 1, {2}),
 (0.76, 3, {6}),
 (0.77, 1, {2}),
 (0.77, 2, {7}),
 (0.77, 3, {6}),
 (0.77, 9, {14, 21, 42}),
 (0.78, 1, {2, 5, 10}),
 (0.78, 2, {19}),
 (0.78, 3, {6}),
 (0.78, 6, {15}),
 (0.79, 1, {2}),
 (0.79, 3, {6}),
 (0.8, 1, {2, 2341, 4682}),
 (0.8, 3, {6}),
 (0.81, 1, {2}),
 (0.81, 3, {6}),
 (0.82, 1, {2}),
 (0.82, 2, {11}),
 (0.82, 3, {6}),
 (0.83, 1, {2, 5, 10}),
 (0.83, 3, {6}),
 (0.83, 6, {15, 25, 75}),
 (0.84, 1, {5}),
 (0.84, 2, {4, 8, 16}),
 (0.84, 6, {10, 20}),
 (0.86, 2, {4, 7, 14, 28}),
 (0.86, 6, {8}),
 (0.88, 2, {4, 8}),
 (0.89, 1, {5}),
 (0.89, 2, {11, 22}),
 (0.89, 3, {83}),
 (0.89, 6, {10})]

In [66]:
# setting run variables

# structure vars from get_param_combos() 
TRAIN_PROPORTION = .83
BATCH_SIZE = 1
MEMORY = 6

# other structure variables 
DROPOUT = .6
UNITS = 64
ACTIVATION = "tanh"

# training variables
ALPHA = 1e-06
ACCURACY_K = 10
EPOCHS = 1
CLASS_COUNT = len(np.unique(y))

In [67]:
# breakup the data into a training and test set 
train_generator, test_generator, TRAINING_WIDTH = train.tt_split(y, TRAIN_PROPORTION, MEMORY, BATCH_SIZE)

In [68]:
# create the model
lstm = LSTM(DROPOUT, ACTIVATION, ALPHA, UNITS, CLASS_COUNT, BATCH_SIZE, MEMORY, TRAINING_WIDTH, ACCURACY_K)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_lstm (LSTM)            (1, 6, 64)                16896     
_________________________________________________________________
hidden_lstm (LSTM)           (1, 64)                   33024     
_________________________________________________________________
dropout (Dropout)            (1, 64)                   0         
_________________________________________________________________
output (Dense)               (1, 424)                  27560     
Total params: 77,480
Trainable params: 77,480
Non-trainable params: 0
_________________________________________________________________
None


In [69]:
# train (stateful model)
lstm.stateful_train(train_generator, EPOCHS)




In [85]:
# run on test data
max_index_col = lstm.test(test_generator)

 528/3975 [==>...........................] - ETA: 16s

KeyboardInterrupt: 

In [87]:
prediction_df = munge.unpack_predictions(max_index_col, geohash_idx2hash)
display(prediction_df)
to_insert = prediction_df.to_dict(orient="records") # (for other notebook...)
to_insert

Unnamed: 0,latitude,longitude
0,-14.0625,-73.125


[{'latitude': -14.0625, 'longitude': -73.125}]

In [41]:
test_predictions = lstm.test(test_generator)
test_predictions # add confusion matrix, etc.



array([65, 65, 65, ..., 65, 65, 65])

Once the model is trained, we can generate a prediction for future dates. The idea here — generate a prediction for one future step, using the last 12 steps. Add new prediction to the array, remove the first entry from the same array and predict the next step with an updated array of 12 steps.


https://stats.stackexchange.com/questions/174026/is-it-legitimate-to-refit-my-best-model-with-my-test-data-as-a-final-step-in-the?rq=1

In [71]:
n_input = MEMORY

pred_list = []
last_memory_batch = y[-n_input:].reshape((1, n_input, TRAINING_WIDTH))

In [72]:
last_memory_batch[0].shape

(6, 1)

In [76]:
for i in range(n_input):  
    pred_list.append(lstm.model.predict(last_memory_batch)) 
    print(pred_list)
    break
#     batch = np.append(batch[:,1:,:],[[pred_list[i]]], axis=1)

[array([[0.00212666, 0.00249066, 0.00274847, 0.00230333, 0.00254793,
        0.0024041 , 0.00205913, 0.00255496, 0.00252079, 0.00216205,
        0.00218657, 0.00226603, 0.0024705 , 0.00199615, 0.00290903,
        0.00291669, 0.00222745, 0.00213845, 0.00287924, 0.00200959,
        0.00188322, 0.0020001 , 0.00222451, 0.00246872, 0.00214157,
        0.0020678 , 0.00235897, 0.00230502, 0.00264481, 0.00279259,
        0.00239726, 0.00196944, 0.00250524, 0.0022767 , 0.00235279,
        0.00268583, 0.00255392, 0.00246852, 0.00257525, 0.00197707,
        0.00213761, 0.00221334, 0.0022149 , 0.00275296, 0.00222832,
        0.0020931 , 0.00233639, 0.00254231, 0.00259375, 0.00243634,
        0.00277229, 0.00197449, 0.00265802, 0.00250529, 0.0020584 ,
        0.0023786 , 0.0025167 , 0.00225672, 0.00252109, 0.00268841,
        0.00231953, 0.00211948, 0.00260761, 0.00231187, 0.00261153,
        0.00310364, 0.00258903, 0.00253882, 0.0036233 , 0.00296588,
        0.00247422, 0.00259603, 0.00253454, 0.0

https://machinelearningmastery.com/how-to-use-the-timeseriesgenerator-for-time-series-forecasting-in-keras/

https://towardsdatascience.com/time-series-prediction-beyond-test-data-3f4625019fd9

## writing predictions to predictions table 

In [3]:
pip install psycopg2-binary


Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)
     |████████████████████████████████| 3.4 MB 7.2 MB/s            
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.1
Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import psycopg2
import pandas as pd

In [7]:
df= pd.DataFrame({"a":[4, 6], "b":[2, 5], "c":[45, 7]}) 

In [8]:
for row in df.to_dict(orient="records"):
    print(row)

{'a': 4, 'b': 2, 'c': 45}
{'a': 6, 'b': 5, 'c': 7}


In [11]:
POSTGRES_USER = os.environ['POSTGRES_USER']
POSTGRES_PASSWORD = os.environ['POSTGRES_PASSWORD']

In [13]:
# CHANGE TO CREDENTIALS FRM OS (db name and user too!)
conn = psycopg2.connect(f"dbname=quakedb user={POSTGRES_USER} password={POSTGRES_PASSWORD} host=postgres_db")

In [14]:
cur = conn.cursor()

In [19]:
cur.execute("rollback")
cur.execute("CREATE TABLE predictions (id serial PRIMARY KEY, longitude NUMERIC, latitude NUMERIC);")

In [21]:
predictions_to_insert = [{'latitude': -14.0625, 'longitude': -73.125}] # copied from model.ipynb...

[{'latitude': -14.0625, 'longitude': -73.125}]

In [25]:
for k2v in predictions_to_insert:
    cur.execute(f"INSERT INTO predictions (latitude, longitude) VALUES ({k2v['latitude']}, {k2v['longitude']})")

In [26]:
# test
cur.execute("SELECT * FROM predictions;")
cur.fetchone()

(1, Decimal('-73.125'), Decimal('-14.0625'))

In [27]:
# persist changes
conn.commit()

In [None]:
# close communication
cur.close()
conn.close()