# Notebook to generate the results for the intervals seciton of the report.

In [1]:
# import packages
from aquacrop import AquaCropModel, Soil, Crop, InitialWaterContent, IrrigationManagement
from aquacrop.utils import prepare_weather, get_filepath
import numpy as np
import matplotlib as mplt
import matplotlib.pyplot as plt
import datetime as dt
import random as random
import pandas as pd
import seaborn as sns
import os

# get current working directory to load and save files
thedir = os.getcwd()

# use cwd to to get folders containing figures and data
datadir = os.path.abspath(os.path.join(os.path.dirname(thedir), '..', 'acrop/model_results'))

In [2]:
def get_date(first,last,year):
    """
    function generates a random date in the interval [first, last]
    
    Parameters:
    first1 (str): start of interval of possible watering dates
    last1 (str): end of interval of possible watering dates
    year (int): the year to use
    Returns:
    str: a YYYY-MM-DD str of a date between the provided intervals
    """
    start = dt.date(year, int(first[0:2]), int(first[3:]))
    end = dt.date(year, int(last[0:2]), int(last[3:]))
    interval_time = end - start
    interval_days = interval_time.days
    random_days = random.randrange(interval_days)
    irrig = str(start + dt.timedelta(days = random_days))
    if irrig[5:]=="02-29":
        irrig = get_date(first,last,year)
    return irrig

def genStableCalendar(first1,last1,first2,last2,year,depth):
    """
    function to generate random days for each run of the model but remains constant across growing seasons
    
    Parameters:
    first1 (str): start of first interval of possible watering dates
    last1 (str): end of first interval of possible watering dates
    first2 (str): start of second interval of possible watering dates
    last2 (str): end of second interval of possible watering dates
    year (int): the year to begin
    
    Returns:
    list: returns a list of two elements [OSPY calendar item,list of dates]
    """
    cal = []
    md = []
    cal.append(F'{year}-12-15')
    first = get_date(first1,last1,year+1)
    second = get_date(first2,last2,year+1)
    day_month = first[5:]
    day_month_ = second[5:]
    cal.append(first)
    cal.append(second)
    md.append([day_month,day_month_])
    for i in range(year+1,2003):
        cal.append(F'{i}-12-15')
        cal.append(F'{i+1}-{day_month}')
        cal.append(F'{i+1}-{day_month_}')
    calendar_pd = pd.DataFrame(cal)
    cal_pd = calendar_pd.set_axis(['Date'], axis=1, inplace=False)
    cal_pd['Depth'] = depth
    # irrigation
    # note that clean water is not specified as it is the default value for irrigation.
    # note that the default irrigitation method is used to achieve the 30 cm)
    im = IrrigationManagement(irrigation_method=3,Schedule = cal_pd,AppEff = 100,WetSurf=100)
    return [im,md]

def genExactCal(first,second,year,depth):
    cal = []
    md = []
    cal.append(F'{year}-12-15')
    cal.append(F'{year+1}-{first}')
    cal.append(F'{year+1}-{second}')
    for i in range(year+1,2003):
        cal.append(F'{i}-12-15')
        cal.append(F'{i+1}-{first}')
        cal.append(F'{i+1}-{second}')
    calendar_pd = pd.DataFrame(cal)
    cal_pd = calendar_pd.set_axis(['Date'], axis=1, inplace=False)
    cal_pd['Depth'] = depth
    # irrigation
    # note that clean water is not specified as it is the default value for irrigation.
    # note that the default irrigitation method is used to achieve the 30 cm)
    im = IrrigationManagement(irrigation_method=3,Schedule = cal_pd)
    return im

In [3]:
def runStable(runs,first1,last1,first2,last2,year,depth):
    """
    function runs the stable calendar
    
    Parameters:
    runs (int): the number of runs
    first1 (str): start of first interval of possible watering dates
    last1 (str): end of first interval of possible watering dates
    first2 (str): start of second interval of possible watering dates
    last2 (str): end of second interval of possible watering dates
    year (int): the year to begin
    Returns:
    list: returns a list of two elements [outputs dataframe,list of dates]
    """ 
    i = 1
    mylist = []
    dates= []
    while i <= runs:
        newcal = genStableCalendar(first1,last1,first2,last2,year,depth)
        cal = newcal[0]
        dates.append([newcal[1],i])
        mylist.append(run_model(cal,i))
        if i%10==0:
            print(f'run # {i}')
        i+=1
    new_stable = pd.concat(mylist)
    return [new_stable,dates]

def runExact(first,second,year,depth):
    newcal = genExactCal(first,second,year,depth)
    a = run_model_(newcal)
    a['first'] = dt.date(year, int(first[:2]), int(first[3:]))
    a['second'] = dt.date(year, int(second[:2]), int(second[3:]))
    return a

In [4]:
def run_model(cal,i):
    model = AquaCropModel(sim_start_time=simStart,
                      sim_end_time=simEnd,
                      weather_df=weather_data,
                      soil=my_soil,
                      crop=my_crop,
                      initial_water_content=my_iwc,
                      irrigation_management=cal)
    model.run_model(till_termination=True)
    
    final_stats = model._outputs.final_stats
    
    # assemble water variables
    water_flux = model._outputs.water_flux
    wf = water_flux.groupby(["season_counter"]).sum()
    wf = wf[["Es","Tr","DeepPerc","CR"]]
    wf["Season"] = wf.index
    wf.reset_index(inplace = True,drop = True)

    # assemble crop growth variables
    crop_growth = model._outputs.crop_growth
    crop_growth["Season"] = crop_growth["season_counter"]
    crops_max = crop_growth.groupby(["season_counter"]).max()[["biomass","harvest_index","biomass_ns","canopy_cover","Season"]]

    merged = final_stats.merge(wf,how="inner",on=["Season"])
    outputs = merged.merge(crops_max,how ="inner", on =["Season"])
    outputs['run']=i

    return outputs

def run_model_(cal):
    model = AquaCropModel(sim_start_time=simStart,
                      sim_end_time=simEnd,
                      weather_df=weather_data,
                      soil=my_soil,
                      crop=my_crop,
                      initial_water_content=my_iwc,
                      irrigation_management=cal)
    model.run_model(till_termination=True)
    
    final_stats = model._outputs.final_stats
    
    # assemble water variables
    water_flux = model._outputs.water_flux
    wf = water_flux.groupby(["season_counter"]).sum()
    wf = wf[["Es","Tr","DeepPerc","CR"]]
    wf["Season"] = wf.index
    wf.reset_index(inplace = True,drop = True)

    # assemble crop growth variables
    crop_growth = model._outputs.crop_growth
    crop_growth["Season"] = crop_growth["season_counter"]
    crops_max = crop_growth.groupby(["season_counter"]).max()[["biomass","harvest_index","biomass_ns","canopy_cover","Season"]]

    merged = final_stats.merge(wf,how="inner",on=["Season"])
    outputs = merged.merge(crops_max,how ="inner", on =["Season"])

    return outputs

In [5]:
# assign outputs from stable
def getOutputsPD():
    """
    function to get results pd from results list
    """
    ns_pd = new_stable[0]
    return ns_pd

def getOutputsDate():
    """
    function to get dates pd from results list
    """
    ns_dt = pd.DataFrame(new_stable[1])
    return ns_dt

def getDates(df1,df2):
    """
    function to get dates pd into usefel format
    Parameters:
    runs (int): the number of runs
    first1 (str): start of first interval of possible watering dates
    last1 (str): end of first interval of possible watering dates
    first2 (str): start of second interval of possible watering dates
    last2 (str): end of second interval of possible watering dates
    year (int): the year to begin
    Returns:
    """
    # assign outputs from random
    #nr_pd = new_random[0]

    # turn randomized dates into panda dataframes for inclusion in analysis

    # clean-up nested list of lists of lists into useful panda dataframe object
    col_1 = df1.iloc[:,0]
    col_2 = df1.iloc[:,1]
    first = []
    second = []
    run = []
    k = 1
    for i in col_1:
        for j in i:
            first.append(j[0])
            second.append(j[1])
            run.append(k)
            k = k+1
    stable_dates = pd.DataFrame({"run":run,"date_1":first,"date_2":second})

    # create new dataframe of the same length as outputs object for later merging

    first = []
    last = []
    run = []
    for i in df2.run:
        for j in range(len(stable_dates.run)):
            if stable_dates.at[j,"run"] == i:
                first.append(stable_dates.at[j,"date_1"])
                last.append(stable_dates.at[j,"date_2"])
                run.append(i)
    stable_dates_ = pd.DataFrame({"date_1":first,"date_2":last,"run":run})
    return stable_dates

def getStatsRuns(df1,df2):
    """
    function runs the stable calendar
    
    Parameters:
    ns_pd (df): dataframe of outputs from runs
    Returns:
    list: returns a dataframe of statistics aggregated across runs.
    """
    stats = df1.groupby("run").agg({'Yield (tonne/ha)' : ['mean', 'min','max','median','var'], 
                            'biomass' : ['mean', 'min','max','median','var'],
                            'harvest_index' : ['mean', 'min','max','median','var'],
                            'canopy_cover': ['mean', 'min','max','median','var'],
                            'Es' : ['mean', 'min','max','median','var'],
                            'Tr': ['mean', 'min','max','median','var'],
                            'DeepPerc': ['mean', 'min','max','median','var'],
                            'CR': ['mean', 'min','max','median','var']})
    # flatten multilevel column index using "-" to join levels.
    stats.columns = ["_".join(pair) for pair in stats.columns]
    # merge dates into stats dataframe. Merge on "run" column.
    stats.reset_index(drop = False,inplace = True)
    stats = stats.merge(df2)
    return stats

def getStatsSeasons(df1):
    stats_2 = df1.groupby("Season").agg({'Yield (tonne/ha)' : ['mean', 'min','max','median','var'], 
                            'biomass' : ['mean', 'min','max','median','var'],
                            'harvest_index' : ['mean', 'min','max','median','var'],
                            'canopy_cover': ['mean', 'min','max','median','var'],
                            'Es' : ['mean', 'min','max','median','var'],
                            'Tr': ['mean', 'min','max','median','var'],
                            'DeepPerc': ['mean', 'min','max','median','var'],
                            'CR': ['mean', 'min','max','median','var']})
    stats_2.columns = ["_".join(pair) for pair in stats_2.columns]
    stats_2.to_csv(F"{datadir}/{date_pair}_{depth}s.csv")

def getDays(df):
    """
    function that appeds the number of days since planting for the first and second watering dates to the result of get_Stats()
    
    Parameters:
    df1 (df): dataframe of outputs from get_Stats
    df2 (df): dataframe of dates from getDates
    """
    first = []
    start = dt.date(1979,12,15)    
    for i in df.date_1:
        diff = dt.date(1980,int(i[:2]),int(i[3:])) - start
        first.append(diff)
    second = []
    for i in df.date_2:
        diff = dt.date(1980,int(i[:2]),int(i[3:])) - start
        second.append(diff)
    stats["irrig_1"] = first
    stats["irrig_2"] = second
    stats["irrigation_1"] = df.irrig_1.dt.days
    stats["irrigation_2"] = df.irrig_2.dt.days
    stats.to_csv(F"{datadir}/{date_pair}_{depth}.csv")

def getExact(date1,date2,depth):
    interval = runExact(date1,date2,1979,depth)
    summary = interval.agg({'Yield (tonne/ha)' : ['mean', 'min','max','median','var'], 
                            'biomass' : ['mean', 'min','max','median','var'],
                            'harvest_index' : ['mean', 'min','max','median','var'],
                            'canopy_cover': ['mean', 'min','max','median','var'],
                            'Es' : ['mean', 'min','max','median','var'],
                            'Tr': ['mean', 'min','max','median','var'],
                            'DeepPerc': ['mean', 'min','max','median','var'],
                            'CR': ['mean', 'min','max','median','var']})
    summary["date1"] = date1
    summary["date2"] = date2
    return summary

In [6]:
# note that clean water is not specified as it is the default value for irrigation.
# note that the default irrigitation method is used to achieve the 30 cm - unclear how to change to "sprinkler" irrigation.
# declare basic parameters from instructions on exercise 7.7

filepath=get_filepath('tunis_climate.txt')
weather_data = prepare_weather(filepath)
my_soil = Soil(soil_type='SandyLoam')
my_crop = Crop('WheatGDD', planting_date='12/01')
my_iwc = InitialWaterContent(value = ["WP"])
simStart = "1979/08/15"
simEnd = "2002/05/31"

### Interval 1 (30,60)

This is the first set of runs using a fixed date and an interval around it. A dataframe of results of all the exact dates is created and saved, as well as a dataframe representing the resuls of a fixed number of random runs in the interval around the selected date.

In [7]:
# declare intervals and exact dates to test

i30 = ["01-06","01-24"]
i60 = ["02-05","02-21"]
i80 = ["02-26","03-13"]
i110 = ["03-27","04-12"]
i130 = ["04-16","05-02"]
i140 = ["04-26","05-15"]
e30 = "01-14"
e60 = "02-13"
e80 = "03-05"
e110 = "04-04"
e130 = "04-24"
e140 = "05-04"

In [8]:
depth = 25
runs_s = 25
summary_1 = getExact(e30,e60,depth)

date_pair = "30_60"
interval1 = i30
interval2 = i60
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)

df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 2 (30,80)

In [9]:
summary_2 = getExact(e30,e80,depth)

date_pair = "30_80"
interval1 = i30
interval2 = i80
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)

df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 3 (30,110)

In [10]:
summary_3 = getExact(e30,e110,depth)

date_pair = "30_110"
interval1 = i30
interval2 = i110
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)

df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 4 (60,110)

In [11]:
summary_4 = getExact(e60,e110,depth)

date_pair = "60_110"
interval1 = i60
interval2 = i110
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)

df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 5 (60,130)

In [12]:
summary_5 = getExact(e60,e130,depth)
date_pair = "60_130"
interval1 = i60
interval2 = i130
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)
df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 6 (80,110)

In [13]:
summary_6 = getExact(e80,e110,depth)
date_pair = "80_110"
interval1 = i80
interval2 = i110
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)
df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 7 (80,140)

In [14]:
summary_7 = getExact(e80,e140,depth)
date_pair = "80_140"
interval1 = i80
interval2 = i140
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)
df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

run # 10
run # 20


### Interval 8 (110,140)

In [15]:
summary_8 = getExact(e110,e140,depth)

date_pair = "110_140"
interval1 = i110
interval2 = i140
year = 1979
new_stable = runStable(runs_s,interval1[0],interval1[1],interval2[0],interval2[1],year,depth)

df1 = getOutputsPD()
dates = getOutputsDate()
dates = getDates(dates,df1)
stats = getStatsRuns(df1,dates)
getDays(stats)
getStatsSeasons(df1)

mydfs = [summary_1,summary_2,summary_3,summary_4,summary_5,summary_6,summary_7,summary_8]
names = ['summary_1','summary_2','summary_3','summary_4','summary_5','summary_6','summary_7','summary_8']
stacked_df = pd.concat(
  [df.assign(names = names) for df, names in zip(mydfs, names)])
stacked_df.to_csv(F"{datadir}/exact_{depth}.csv")

run # 10
run # 20


### Interval 6 (114,144) AKA (04-20, 07-05)

### Interval 7 (114,144) AKA (04-20, 07-05)