In [15]:
import geopandas as gpd
import pandas as pd
import io
import requests
import gzip
import wradlib as wrl
from shapely.geometry import Polygon
import datetime
from zipfile import ZipFile
from requests.exceptions import RequestException
from sqlalchemy import create_engine
from sklearn.ensemble import RandomForestRegressor

import pickle

postgres_passwd = "postgrespostgres"
db_qtrees = "localhost"

engine = create_engine(
    f"postgresql://postgres:{postgres_passwd}@{db_qtrees}:5432/qtrees"
)

with engine.connect() as con:
    train_data = pd.read_sql('select * from private.training_data', con)
    
train_data = train_data.dropna()
FEATURES = ["winter", "spring", "summer", "fall", "standalter", "rainfall_mm_14d_sum", "temp_avg_c_14d_avg", "median_value"]
for type in [1, 2, 3]:
    X = train_data.loc[train_data.type_id == type, FEATURES]
    y = train_data.loc[train_data.type_id == type, "target"]

    model = RandomForestRegressor()
    model.fit(X, y)

    pickle.dump(model, open(f'model{type}_.m', 'wb'))

In [43]:
q = """(SELECT trees.id, 1 as type_id, date(now()::date - interval '1 d') as yesterday,
		shading.spring, shading.summer, shading.fall, shading.winter, trees.gattung_deutsch, trees.standalter,
		(select rainfall_mm_14d_sum FROM private.weather_solaranywhere_14d_agg WHERE date = date(now()::date - interval '1 d')),
		(select temp_avg_c_14d_avg FROM private.weather_solaranywhere_14d_agg WHERE date = date(now()::date - interval '1 d')),
		(select median_value FROM private.sensor_measurements_agg WHERE (date(sensor_measurements_agg.timestamp) = date(now()::date - interval '1 d')) AND sensor_measurements_agg.type_id = 1)
FROM (SELECT * FROM public.trees WHERE trees.street_tree = True) AS trees LEFT JOIN public.shading ON shading.tree_id = trees.id)"""

with engine.connect() as con:
    test_data = pd.read_sql(q, con)

X = test_data[FEATURES+["id"]].set_index("id").dropna()
model = pickle.load(open(f'model{1}_.m', 'rb'))
y_hat = pd.DataFrame(model.predict(X), index=X.index).reset_index()
y_hat.columns = ["tree_id", "value"]
y_hat["type_id"] = 1
y_hat["timestamp"] = datetime.date.today() - pd.Timedelta("1D")
y_hat["created_at"] = datetime.datetime.now()
y_hat["model_id"] = "Random Forest (simple)"
y_hat.head()

In [48]:
y_hat.to_sql("nowcast", engine, if_exists="append", schema="public", index=False, method='multi')

411704