# Waterbag Model API Deployment Test Environment

### Utility functions

In [1071]:
# pip install --upgrade google-cloud-bigquery
import pandas as pd, numpy as np, requests, pickle, pymongo
from datetime import datetime, timezone
from sklearn.preprocessing import MinMaxScaler as mms
from google.cloud import bigquery
from google.oauth2 import service_account
from datetime import datetime
import pytz; tz_br = pytz.timezone('Brazil/East')
datetime.now(tz_br).isoformat()

'2022-09-18T16:08:57.685540-03:00'

#### Flat stations' observations

In [497]:
row_map = lambda row: row[1].add_suffix(' - ' + row[0])

def flat_observations(data):
    return pd.concat(list(map(row_map, data.iterrows())))

---
# Model deployment information

In [879]:
info_path = 'Modelos/Catete/InformaçõesDeployCatete.csv'
deploy_info = pd.read_csv(info_path, index_col=0)

from deploy_info import alerta_feature_name_map, alerta_station_name_id_map

---
# Inmet bigquery request - python client library

In [860]:
google_credentials = '../../../../Apps/Servers/bolsao-api/pluvia-360323-cba05f315c7e.json'
credentials = service_account.Credentials.from_service_account_file(google_credentials)

project_id = 'pluvia-360323'
client = bigquery.Client(credentials=credentials)

yesterday = (pd.datetime.now() - pd.offsets.Day()).date().isoformat()

query = f'''
SELECT * FROM `datario.meio_ambiente_clima.meteorologia_inmet`
WHERE data_particao >= "{yesterday}"
ORDER BY data_particao DESC, horario DESC
'''; print(query)

query_job = client.query(query)
inmet = pd.DataFrame(list(map(dict, query_job.result())))

### Inmet data preprocessing

key_cols = ['primary_key', 'data_particao', 'horario']

# Last available record per station
last_records = inmet.groupby(['id_estacao']).first()
last_records.drop(key_cols, 1, inplace=True)

# Flat stations' readings
inmet_flat = flat_observations(last_records)

  now = pd.datetime.now()



SELECT * FROM `datario.meio_ambiente_clima.meteorologia_inmet`
WHERE data_particao >= "2022-09-17"
ORDER BY data_particao DESC, horario DESC



---
# Alerta-Rio API request

In [861]:
AlertaAPI = r'http://websempre.rio.rj.gov.br/json/chuvas'
alerta = pd.DataFrame(requests.get(AlertaAPI).json()['objects'])

# Alerta-Rio data preprocessing
alerta = pd.DataFrame(
    alerta['data'].tolist(),
    index=alerta['name'].map(alerta_station_name_id_map).astype('str')
).rename(columns=alerta_feature_name_map)

# Flat stations observations
alerta_flat = flat_observations(alerta)

---
# Feature transformation

### Combine and transform observations from both sources

In [862]:
#### Reorder readings to match model input format
features = pd.concat([inmet_flat, alerta_flat]).loc[deploy_info.index].to_frame().T
print(features.shape)

#### Min max scale transformation
scaler = mms().fit(deploy_info.T)
features.loc[features.index] = scaler.transform(features)

#### Fill missing values with variable minimum
na_msk = features.loc[0].isna()
features.loc[0, na_msk] = deploy_info['min'][na_msk]

(1, 217)


---
# Model prediction

In [863]:
def calibrate(prob, threshold=.5):
    if prob < threshold:
        return 0.5 * prob / threshold
    else:
        return 0.5 + 0.5 * (prob - threshold) / (1 - threshold)

In [863]:
model_name = 'catete'
model_threshold = 0.1
model = pickle.load(open('../ModeloBolsõesCatete.sav', 'rb'))
yprob = model.predict_proba(features)[0][1]

yprob_cal = list(map(calibrate, [yprob], [model_threshold]))

In [908]:
yprob_cal

array([0.00793794])

### Prediction record

In [1047]:
datetime.now().time().isoformat()

'15:58:35.592322'

In [1057]:
datetime.tzinfo

<attribute 'tzinfo' of 'datetime.datetime' objects>

In [898]:
prob_threshold = .1

now = datetime.now()
today = now.date().isoformat()
time = now.time().isoformat()[:8]

prediction = [{
    'date': today,
    'time': time,
    'cluster': model_name,
    'prediction': [{
        'range': '30min',
        'probability': list(yprob_cal)[0],
        'confidence': list(yconf)[0],
    }],
}]

  now = pd.datetime.now()


---
# Save prediction to mongo database

In [867]:
conn_str = "mongodb+srv://luisresende13:Gaia0333@pluvia-cluster.ea8fb4s.mongodb.net/?retryWrites=true&w=majority"
client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=10000)

insert_result = client.Waterbag.Prediction.insert_many(prediction)
insert_result.inserted_ids

In [896]:
prediction

[{'date': '2022-09-18',
  'time': '11:46:45',
  'cluster': 'catete',
  'probability': {'30min': array([0.00793794])}}]

In [899]:
client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=15000)

insert_result = client.Waterbag.Prediction.insert_many(prediction)

---
# Retrieve predictions

In [966]:
def to_id_str(obj):
    obj['_id'] = str(obj['_id'])
    return obj

In [972]:
now = pd.datetime.now()
today = now.date().isoformat()
time = now.time().isoformat()[:8]

  now = pd.datetime.now()


### Predict endpoint

In [988]:
### Consut prediction database latest record 
client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=15000)
latest_prediction = client.Waterbag.Prediction.find({
    "$or": [{
        "date": {'$gt': yesterday}
    }, {
        '$and': [{'date': {'$eq': yesterday}}, {'time': {'$gte': time}}]
    }]
})

latest_prediction = list(map(to_id_str, latest_prediction))
time_max = pd.to_datetime(pd.DataFrame(latest_prediction)['time']).max().time().isoformat()
time_max_filter = lambda pred: pred['time'] == time_max
current_prediction = list(filter(time_max_filter, latest_prediction))

In [1041]:
requests.get('https://bolsoes-api.herokuapp.com/predict').text

'[\n  {\n    "_id": "63274fbdc50bf4d6e5bfedeb",\n    "date": "2022-09-18",\n    "time": "17:05:00",\n    "cluster": "Catete",\n    "prediction": [\n      {\n        "range": "30 Min",\n        "probability": 0.00793794407172125,\n        "confidence": 0.9841241118565575\n      }\n    ]\n  }\n]'

In [1013]:
current_prediction

[{'_id': '6327465d01b9ae6d2053887a',
  'date': '2022-09-18',
  'time': '16:25:00',
  'cluster': 'Catete',
  'prediction': [{'range': '30 Min',
    'probability': 0.00793794407172125,
    'confidence': 0.9841241118565575}]}]