In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression

from xgboost import XGBRegressor
from xgboost import XGBClassifier
from xgboost import XGBRFRegressor, XGBRFClassifier

from lightgbm import LGBMClassifier, LGBMRegressor

# Views 3
from viewser.operations import fetch
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_runs import storage
from views_runs.storage import store, retrieve, fetch_metadata

from views_forecasts.extensions import *

# Other packages
import pickle as pkl

# Packages from viewsforecasting repository

#from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated
import os
import sys
sys.path.append('../')
sys.path.append('../Tools')
sys.path.append('../Intermediates')

In [None]:
!viewser tables list

In [None]:
!viewser tables show climate_year_pgy

In [None]:
#!viewser tables show hack_temp_escwa_pgm_pgm

In [None]:
#!viewser tables show tbl_6d3a6temp_prec_pgm 

In [None]:
#!viewser tables show ntl_pgy

In [None]:
#!viewser tables show kcmd_pgy

In [None]:
#!viewser tables show temperature_precipitation_pgm

In [None]:
#!viewser tables show tbl_ca543climate_pgm

In [None]:
#!viewser tables show shdi_pgy

In [None]:
#!viewser tables show climate_pgm

In [None]:
#!viewser transforms list

In [None]:
from viewser import Queryset, Column

In [None]:
    qs_combined_escwa = (Queryset("escwa001_pgm_combined", "priogrid_month")

                        # target variable
                        .with_column(Column("ln_ged_sb_dep", from_table="ged2_pgm", from_column="ged_sb_best_sum_nokgi")
                                     .transform.missing.replace_na()
                                     .transform.ops.ln()
                                     )
  
                        .with_column(Column("count_moder_drought_prev10", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="count_moder_drought_prev10")
                                     .transform.missing.replace_na(0)
                                     )
         
                        .with_column(Column("cropprop", from_table="hack_temp_escwa_pgm_pgm", from_column="cropprop")
                                     .transform.missing.replace_na(0)
                                     )
                        
                        .with_column(Column("growseasdummy", from_table="hack_temp_escwa_pgm_pgm", from_column="growseasdummy")
                                     .transform.missing.replace_na(0)
                                     )
                                            
                        .with_column(Column("gwmean", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="gwmean")
                                     .transform.missing.fill()
                                     )
                                                
                        .with_column(Column("knn_dummy", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="knn_dummy")
                                     .transform.missing.fill()
                                     )
                         
                        .with_column(Column("spei1_gs_prev10", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei1_gs_prev10")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("spei1_gs_prev10_anom", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei1_gs_prev10_anom")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("spei1_gsm_cv_anom", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei1_gsm_cv_anom")
                                     .transform.missing.replace_na(0)
                                     )

                        .with_column(Column("spei1_gsm_detrend", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei1_gsm_detrend")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("spei1gsy_lowermedian_count", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei1gsy_lowermedian_count")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("spei_48_detrend", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="spei_48_detrend")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag1_dr_mod_gs", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag1_dr_mod_gs")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag1_dr_moder_gs", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag1_dr_moder_gs")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag1_dr_sev_gs", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag1_dr_sev_gs")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag1_spei1_gsm", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag1_spei1_gsm")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag_12_crop_sum", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag_12_crop_sum")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag_12_harvarea_maincrops", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag_12_harvarea_maincrops")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag_12_irr_maincrops", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag_12_irr_maincrops")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        .with_column(Column("tlag_12_rainf_maincrops", from_table="hack_temp_escwa_pgm_pgm",
                                            from_column="tlag_12_rainf_maincrops")
                                     .transform.missing.replace_na(0)
                                     )
                         
                        # timelag 0 of target variable
                        .with_column(Column("ln_ged_sb", from_table="ged2_pgm", from_column="ged_sb_best_sum_nokgi")
                                     .transform.ops.ln()
                                     .transform.missing.fill()
                                     )
                         
                        # Decay functions
                        # sb
                        .with_column(Column("decay_ged_sb_1", from_table="ged2_pgm", from_column="ged_sb_best_sum_nokgi")
                                     .transform.missing.replace_na()
                                     .transform.bool.gte(1)
                                     .transform.temporal.time_since()
                                     .transform.temporal.decay(24)
                                     .transform.missing.replace_na()
                                     )

                        .with_column(Column("decay_ged_sb_25", from_table="ged2_pgm", from_column="ged_sb_best_sum_nokgi")
                                     .transform.missing.replace_na()
                                     .transform.bool.gte(25)
                                     .transform.temporal.time_since()
                                     .transform.temporal.decay(24)
                                     .transform.missing.replace_na()
                                     )

                         # os
                        .with_column(Column("decay_ged_os_1", from_table="ged2_pgm", from_column="ged_os_best_sum_nokgi")
                                     .transform.missing.replace_na()
                                     .transform.bool.gte(1)
                                     .transform.temporal.time_since()
                                     .transform.temporal.decay(24)
                                     .transform.missing.replace_na()
                                     )

                         # Spatial lag
                        .with_column(Column("splag_1_1_sb_1", from_table="ged2_pgm", from_column="ged_sb_best_sum_nokgi")
                                     .transform.missing.replace_na()
                                     .transform.bool.gte(1)
                                     .transform.temporal.time_since()
                                     .transform.temporal.decay(24)
                                     .transform.spatial.lag(1, 1, 0, 0)
                                     .transform.missing.replace_na()
                                     )

                         # Spatial lag decay
                        .with_column(Column("splag_1_decay_ged_sb_1", from_table="ged2_pgm",
                                     from_column="ged_sb_best_sum_nokgi")
                                    .transform.missing.replace_na()
                                    .transform.bool.gte(1)
                                    .transform.temporal.time_since()
                                    .transform.temporal.decay(24)
                                    .transform.spatial.lag(1, 1, 0, 0)
                                    .transform.missing.replace_na()
                                    )

                 # Log population as control
                        .with_column(Column("ln_pop_gpw_sum", from_table="priogrid_year", from_column="pop_gpw_sum")
                                    .transform.ops.ln()
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                 # from priogrid table:

                        .with_column(Column("ln_ttime_mean", from_table="priogrid_year", from_column="ttime_mean")
                                    .transform.ops.ln()
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("ln_gcp_mer", from_table="priogrid_year", from_column="gcp_mer")
                                    .transform.ops.ln()
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("imr_mean", from_table="priogrid_year", from_column="imr_mean")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("ln_bdist3", from_table="priogrid_year", from_column="bdist3")
                                    .transform.ops.ln()
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("ln_capdist", from_table="priogrid_year", from_column="capdist")
                                    .transform.ops.ln()
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("mountains_mean", from_table="priogrid_year", from_column="mountains_mean")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("dist_diamsec", from_table="priogrid", from_column="dist_diamsec_s_wgs")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("dist_petroleum", from_table="priogrid", from_column="dist_petroleum_s_wgs")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("agri_ih", from_table="priogrid_year", from_column="agri_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("barren_ih", from_table="priogrid_year", from_column="barren_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("forest_ih", from_table="priogrid_year", from_column="forest_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("pasture_ih", from_table="priogrid_year", from_column="pasture_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("savanna_ih", from_table="priogrid_year", from_column="savanna_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("shrub_ih", from_table="priogrid_year", from_column="shrub_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("urban_ih", from_table="priogrid_year", from_column="urban_ih")
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )

                        .with_column(Column("greq_1_excluded", from_table="priogrid_year", from_column="excluded")
                                    .transform.bool.gte(1)
                                    .transform.missing.fill()
                                    .transform.missing.replace_na()
                                    )                         
                         
                        .with_theme("escwa")
                        .describe("""Fatalities, escwa drought and natsocial, pgm level
                                  Predicting number of fatalities with features from the escwa drought and natsocial themes""")
                        )



In [None]:
test = qs_combined_escwa.publish().fetch()

In [None]:
test