### Script that generates the bootstrapped version of trap readings for Spain ###

* needs to have weather / climate models precomputed on daily basis (weather only)
* works for all pests but all models should be per pest 

In [1]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold 
from sklearn.metrics import r2_score
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_pinball_loss, mean_squared_error

In [3]:
import libbloom as lb

In [72]:
from scipy.optimize import curve_fit
from scipy import stats

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
def logifunc(x,l,c,k):
    global delta
    return l / (1 + c*np.exp(-k*x)) + delta 

In [7]:
def poli(x,a,b,c,d):
    global delta
    return a*x**4 + b*x**3 + c*x**2 + d*x + delta 

In [8]:
def fitL(ds,polin =False):
    xdata = np.arange(len(ds))
    ydata = ds.values
    if(polin):
        popt, pcov = curve_fit(poli, xdata, ydata)
        a,b,c,d = popt
        ycs = []
        for x in xdata:
            ycs.append(poli(x,a,b,c,d) )
        dm = pd.DataFrame(index = ds.index, data = {'data' : ds.values, 'polinomial' : ycs})        
        
    else:
        popt, pcov = curve_fit(logifunc, xdata, ydata)
        l,c,k = popt
        ycs = []
        for x in xdata:
            ycs.append(logifunc(x,l,c,k) )
        dm = pd.DataFrame(index = ds.index, data = {'data' : ds.values, 'logistic' : ycs})
    return dm, popt

In [9]:
def nestModels(point_and_elevation,model_gen,model_press,model_wind,gbr = False):
    # args:
    # point_and_elevation =  [lat, lon, elevation] as a numpy array shape (1,3)
    #
    # example 
    # pp = np.array([3.94824123e+01, -4.88099096e-01,  8.50000000e+01]).reshape(1,-1)
    #
    #
    def checkShape(s):
        if(s.shape == (1,3)):
            return True
        else:
            return False
    
    if(checkShape(point_and_elevation)):
        wind_prediction = model_wind.predict(point_and_elevation)[0]
        pressure_prediction = model_press.predict(point_and_elevation)[0]
        # nest the predictions 
        extended_point = np.append(point_and_elevation,[pressure_prediction,wind_prediction]).reshape(1,-1)
        
        if(gbr):
            ddps = []
            for md in model_gen.values():
                ddays_prediction = md.predict(extended_point)[0]
                ddps.append(ddays_prediction)
            return ddps
        else:
            ddays_prediction = model_gen.predict(extended_point)[0]
            return ddays_prediction
    else:
        print("point_and_elevation should have shape: (1,3)")
        return False

In [10]:
def genApproxCurves(dfd, polin = True):
    params_ln = {}
    genR = {}
    global delta
    for q in dfd.columns:
        delta = np.mean(dfd[q].values[0:3])
        if(polin):
            genR[q] ,params_ln[q] = fitL(dfd[q],True)
            dfd = dfd.assign(tm = genR[q].polinomial.values)
        else:
            genR[q] ,params_ln[q] = fitL(dfd[q],False)
            dfd = dfd.assign(tm = genR[q].logistic.values - genR[q].logistic.values[0])
        dfd = dfd.rename(columns={'tm' : '%s_pl'%(q)})
    return dfd,params_ln

In [11]:

def generateDDforPoint(pp,models):
    preds = []
    for dm in models.keys():
        model_gen = models[dm]['model_gen']
        model_press = models[dm]['model_press']
        model_wind = models[dm]['model_wind']
        ## uses ddays referring to yesterday
        pred = nestModels(pp,model_gen,model_press,model_wind,True) ## predict the ddays for given point
        preds.append(pred)
        #print(dm)
    return preds

In [12]:
def upperize(dfd,vvar):
    def special(ds):
        d0 = 0
        nv = []
        for dd in ds.values:
            if(dd >= d0):
                nv.append(dd)
            else:
                nv.append(d0)
            d0 = dd

        return nv
    ds = dfd[vvar]
    for r in range(30):
        ds = special(pd.Series(ds))    
    return ds
        

In [13]:
def getValuesFromModels(pp,models,date):
    
    preds = generateDDforPoint(pp,models)
    PA = np.array(preds)
    
    dfd = pd.DataFrame(PA, columns=['q05','q50','q95'])
    dindex = pd.date_range(start = '2023-01-01', end=date, freq='D')
    dfd = dfd.iloc[0:len(dindex)]
    dfd.index = dindex
    
    dfd,params_ln = genApproxCurves(dfd,False)
    dfd['q05_r'] = upperize(dfd,'q05')
    dfd['q50_r'] = upperize(dfd,'q50')
    dfd['q95_r'] = upperize(dfd,'q95')
    dfds = dfd[['q05_r','q50_r','q95_r']]
    dfds,params_ln = genApproxCurves(dfds,True)    
    return dfds


In [46]:
def generatePrediction(lat,lon,elev,date):
    pp = np.array([lat,lon,elev]).reshape(1,-1)
    today, yesterday,firstJan = lb.getUtilDates()
    if(pd.to_datetime(date) > pd.to_datetime(today)):
        print("O")
        dfds = getValuesFromModels(pp,modelsClimate,date)
        dfdsw = getValuesFromModels(pp,modelsWeather,yesterday)
        deltay = dfdsw.loc[yesterday] - dfds.loc[yesterday]
        delta_r_pl_y = deltay['q50_r_pl']
        mounted = pd.concat([dfdsw[dfdsw.index < pd.to_datetime(today)] , 
                             dfds[dfds.index >= pd.to_datetime(today)] + delta_r_pl_y], axis = 0)
        ## forecast + actual
    else:
        print("M")
        mounted = getValuesFromModels(pp,modelsWeather,date)

    return mounted

In [55]:
def getPeaks(dfPest,mounted_df, fitted = True):
    flights = dfPest[dfPest.flight_peak == 1]
    ddays_vect = flights.ddays_C.values
    if(fitted):
        dd_data = mounted_df['q50_r_pl']
    else:
        dd_data = mounted_df['q50_r']
    peaks = []
    for dv in ddays_vect:
        event = dd_data[dd_data > dv].head(1)
        try:
            dayEvent = event.index[0]
            peaks.append(dayEvent)
        except:
            pass
    return peaks

In [82]:
def minMax(s,rescale = 140):
    s1 = rescale * (s - s.min()) / (s.max() - s.min())
    return s1

In [102]:
def getLevelsPopulations(intvs,peaks, rescale = 140 ):
    hists = []
    stot = []


    
    for r in range(50):
        rvv = []

        shift = -3
        for e in intvs:
            m = e + shift
            alpha = np.random.uniform(3,4)
            beta = np.random.uniform(2,4)
            rv = stats.gamma.rvs(alpha, loc=m, scale=beta, size=1000) #~100 points per day (to ensure statistics)
            rvv.append(pd.Series(rv))
        cnt = pd.concat(rvv)
        his  = np.histogram(cnt.values, bins = int(cnt.max()), density=True)
        hists.append(his[0])
        stot.append(pd.Series(index = np.arange(len(his[0])), data = his[0]))

    idx = pd.date_range(start = peaks[0], end=peaks[-1], freq='D')
    mm = pd.concat(stot, axis = 0)
    dfT_median = mm.groupby(mm.index).median()
    dfT_q05 = mm.groupby(mm.index).quantile(0.05)
    dfT_q95 = mm.groupby(mm.index).quantile(0.95)


    
    dfk = minMax(dfT_median,rescale)
    dfk_q05 = dfk * (dfT_q05 / (dfT_median + np.random.uniform(0,0.001)))
    dfk_q95 = dfk * (dfT_q95 / (dfT_median+ np.random.uniform(0,0.001)))
    
    return dfk_q05,dfk,dfk_q95
    

In [118]:
def getReadingsSeries(dfSpodoptera,mounted):
    peaks = getPeaks(dfSpodoptera,mounted)
    deltas = [0] + [d.days for d in pd.Series(peaks).diff().fillna(0).values if d != 0]
    dfk_q05,dfk,dfk_q95 = getLevelsPopulations(deltas,peaks)
    idx = mounted.iloc[-len(dfk)-1:-1].index
    dfk.index = idx
    dfk_q05.index = idx
    dfk_q95.index = idx
    return dfk_q05,dfk,dfk_q95

In [2]:
def getValuesDate(date, dfk_q05,dfk,dfk_q95):
    mmin = int(dfk_q05.loc[date])
    mmed =  int(dfk.loc[date])
    mmax =  int(dfk_q95.loc[date])
    return mmin,mmed,mmax

In [3]:
def apiResponse(lat,lon,date):
    elev = lb.getElevationUSGS(lat,lon)
    mounted = generatePrediction(lat,lon,elev,date)
    dfk_q05,dfk,dfk_q95 = getReadingsSeries(dfSpodoptera,mounted)
    mmin,mmed,mmax = getValuesDate(date, dfk_q05,dfk,dfk_q95)
    return mmin,mmed,mmax 

### initialization ###

In [33]:
savedir = "savedModels/spodoptera/"
outmodelWeather = savedir + "WeatherModels.pickle"
outmodelClimate = savedir + "ClimateModels.pickle"
modelsClimate = pickle.load(open(outmodelClimate,"rb"))
modelsWeather = pickle.load(open(outmodelWeather,"rb"))
dfSpodoptera = pd.read_csv ("Spodoptera_events.csv") ## this is the file for spodoptera

**Example call**

In [None]:
lat = 38.24
lon = -4.8
date = '2023-08-21'
mmin,mmed,mmax = apiResponse(lat,lon,date)

In [125]:
mmin,mmed,mmax

(29, 80, 115)