In [64]:
import pandas as pd
import yaml

## Plan

### Here are the steps you need to complete to make this function:
-  [x] open the .yaml file with tree properties
    - [x] use these tree properties to assign a death year for each tree based on (1) species and (2) age/year planted
    
- [x] develop the architecture of the .yaml file with strategies. What will this look like? What information will it hold? How flexible/rigid will it be (at first)?

- [x] first off, based on the initial data, determine the death year of all of the trees. append this to a column 'death year' in the new dataframe

- [x]  start by making a function that simply creates the new event dataframe assuming no strategies are implemented. I.e. all the rows will have status of either "plant" or "replant." End year is determined by the death year, and replant is simply determined by the death year + 1.

- [ ]  add to that function the ability to add pruning, so the status will now be "plant," "replant," or "prune"
    * pruning will have to be implemented with some sort of year crieteria (in what year, or even better, at what age, should farmers prune?)
    * with the new row that represents the pruning event, death year carries over but is altered slightly (because we are working under the assumption that pruning increases production life)
    * unless another event is called, end year is death year
    
-  [ ] add to that function the ability to add intercropping.
    * intercropping will be implemented with (1) a year criteria and (2) a proportion criteria (what proportion of the trees will be replanted). 
    * if the trees in the originaal dataset reach the year or age criteria, the original row will become inactive because the 'end year' has been reached. The row will then be split into two rows (but both still show the same ID number, which is how we identify them as the same plot). The first row will be the trees that were left alone, and so it will be the exact same except that is has proportionally less cuerdas (the death year will carry over from the original). The second row will have the trees which were replanted. Their


In [127]:
def openYaml(yamlFilePath : str) -> dict: 
    yamlFile = open(yamlFilePath)
    parsed = yaml.load(yamlFile, Loader =yaml.FullLoader)
    return(parsed)

In [128]:
# open the yaml files  to assign attirbutes to the sim
treeAttributes = openYaml("data/trees.yml")
strategyAttributes = openYaml("intervention/strategy.yml")

In [129]:
# initialData = pd.read_csv("data/demoData.csv")
upData = pd.read_csv("data/demoDataUpdated.csv")
upData.head()

Unnamed: 0,plotID,farmerName,treeType,numCuerdas,ageOfTrees,yearPlanted
0,0,Estefanía Cazares,catuai,9,9,2011
1,1,Aldo Linares,catuai,1,2,2018
2,2,Aldo Linares,borbon,1,20,2000
3,3,Linda Gabriel Portillo Herrera,catuai,4,7,2013
4,4,Linda Gabriel Portillo Herrera,borbon,10,27,1993


In [130]:
# yearPlanted = 2020 - initialData['ageOfTrees']
# initialData['yearPlanted'] = yearPlanted
# print(initialData.keys())
# new = initialData.rename(columns={'Unnamed: 0': 'plotID'})
# new.to_csv("data/demoDataUpdated.csv",  index = False)

In [170]:
def transformData(year, simulationYears, farmData, treeAttributes, strategyAttributes):
    """
    takes in data from repository and returns a new,  transformed dataframe that
    tracks events.
    
    year is an int of the  year where the simulation starts.  if the simulation moves forward
    from  the present, the year is the current year. else it is  the  year the simulation
    begins.
    
    simulationYears is the amount of years that the simulation will iterate through. This
    is necessary to make sure the transformed data only captures events within this range. 
    
    farm data is data frame with farmer's plots
    
    tree attributes is dictionary opened from yaml file with attritbutes of trees.
    
    strategy attributes is dictionary opened from yaml file with attributes of strategies.
    
    returns dataframe with events
    
    
    Notes
    -----
    
    as of now, the condition is that intercrop year and prune year are not in the same. but I might be able to figure out how to work that out. 
    """
    
    endYear = year + simulationYears
    
    # iterate through each row of the original plot dataframe
    for i in range(len(farmData)):
        _plotID = farmData["plotID"][i]
        _farmerName = farmData["farmerName"][i]
        _treeType = farmData["treeType"][i]
        _numCuerdas =  farmData["numCuerdas"][i]
        _startYear = farmData["yearPlanted"][i]
        
        # assume all are planted for initialization
        _status = "plant" 

        _treeAge = currentYear - _startYear

        # check to see that this tree exists in config file
        # _altOrth = [treeAttributes[item]["altOrth"] for item in treeAttributes]
        if (_treeType in treeAttributes.keys()): # or _treeType in _altOrth):

            #  isolate dict
            _treeAttributes = treeAttributes[_treeType]
            # isolate smaller variables and dicts
            _cuerdaHarvestCap  = _treeAttributes["cuerdaHarvestCap"]
            _firstHarvest = _treeAttributes["firstHarvest"]
            _fullHarvest = _treeAttributes["fullHarvest"]
            _descentHarvest = _treeAttributes["descentHarvest"]
            _death = _treeAttributes["death"]

            # calculate death year
            _yearsTillDeath = _death["year"] - _treeAge
            _deathYear = currentYear + _yearsTillDeath

            # create the initial row for the transformed dataframe
            initialRow = pd.DataFrame([[_plotID, _farmerName, _treeType, _numCuerdas, _status, 
                                      _startYear, _deathYear]], columns=["plotID", "farmerName", "treeType", 
                                                "numCuerdas", "status", "startYear",
                                               "deathYear"])
           
            # if this is the first row of the whole transformation
            if (i == 0):
                # initialize the transformation dataframe
                transformedData = initialRow

            else:
                transformedData = pd.concat([transformedData, initialRow], ignore_index=True)
                #transformedData  = transformedData.reset_index(drop = True, inplace = True)
                
                
            # now you've transformed all of the original entries to the new format
            # now you should be iterating through transformed data to add events

            # check to see if replant is in strategy (it always should be)
            if strategyAttributes["replant"]["isReplant"] ==  True:
                _replantYear = (_deathYear + 1)
            else:
                _replantYear = None

            # check to see if prune  is in strategy config
            if strategyAttributes["prune"]["isPrune"] ==  True:
                _pruneAge = strategyAttributes["prune"]["age"]
            else:
                _pruneAge = None

            # check to see if  intercrop is in strategy config
            if strategyAttributes["intercrop"] == True:
                _intercropAge = strategyAttributes["prune"]["age"]
            else:
                _intercropAge = None

            # for this specific plot (see plotID),
            # create a row to check against to see if the program needs to continue creating events
            checkRow = initialRow

            # create a  new var for the year of transformation for this plot
            _simYear = year
            # create a new var for the tree's age for this plot
            _simTreeAge = _treeAge

            # iterate through all years of the simulation to check event sequences
            while (_simYear < endYear):
               #  isolate dict
                _deathYear = checkRow["deathYear"][0]


                if (_replantYear):   
                    if (_simYear == _deathYear):
                        # update death year
                        _simTreeAge = -1
                        _deathYear = (_simYear +  1) + _death["year"]
                        _status = "replant"
                        nextRow = pd.DataFrame([[_plotID, _farmerName, _treeType, _numCuerdas, _status, _replantYear, _deathYear]], 
                                            columns=["plotID", "farmerName","treeType", "numCuerdas", "status", "startYear","deathYear"])
                        transformedData  = pd.concat([transformedData, nextRow],  ignore_index=True)
                        checkRow = nextRow
                        _simYear += 1
                        _simTreeAge += 1
                        #print(_simYear)
                        continue


                    elif (_pruneAge):
                        if (_simTreeAge == _pruneAge):
                            adjustedDeathYear = (checkRow["deathYear"][0]) + 4
                            _pruneYear = _simYear
                            _status = "prune"
                            nextRow = pd.DataFrame([[_plotID, _farmerName, _treeType, _numCuerdas, _status, _pruneYear, adjustedDeathYear]], 
                                                   columns=["plotID", "farmerName","treeType", "numCuerdas", "status", "startYear","deathYear"])
                            transformedData  = pd.concat([transformedData, nextRow],  ignore_index=True)
                            checkRow  = nextRow
                            _simYear += 1
                            _simTreeAge += 1
                            continue

                        else:
                            _simYear += 1
                            _simTreeAge += 1
                            continue
                            
                    else:
                        _simYear += 1
                        _simTreeAge += 1
                        continue

                else:
                    _simYear += 1
                    _simTreeAge += 1
                    continue



        else:
            print("No tree of this type:")
            print(_treeType)
            break
            
        
    return(transformedData)

In [173]:
simData = transformData(2020, 30, upData, treeAttributes, strategyAttributes)
simData[50:70]

Unnamed: 0,plotID,farmerName,treeType,numCuerdas,status,startYear,deathYear
50,14,Homero Ayala,catura,6,plant,2010,2026
51,14,Homero Ayala,catura,6,replant,2027,2043
52,14,Homero Ayala,catura,6,prune,2035,2047
53,14,Homero Ayala,catura,6,replant,2027,2064
54,15,Héctor Salgado,borbon,1,plant,2020,2050
55,15,Héctor Salgado,borbon,1,prune,2028,2054
56,16,Héctor Salgado,catuai,7,plant,2009,2026
57,16,Héctor Salgado,catuai,7,replant,2027,2044
58,16,Héctor Salgado,catuai,7,prune,2035,2048
59,16,Héctor Salgado,catuai,7,replant,2027,2066
