In [2]:
import pandas as pd
import yaml

## Plan

### Here are the steps you need to complete to make this function:
-  [x] open the .yaml file with tree properties
    - [x] use these tree properties to assign a death year for each tree based on (1) species and (2) age/year planted
    
- [x] develop the architecture of the .yaml file with strategies. What will this look like? What information will it hold? How flexible/rigid will it be (at first)?

- [x] first off, based on the initial data, determine the death year of all of the trees. append this to a column 'death year' in the new dataframe

- [x]  start by making a function that simply creates the new event dataframe assuming no strategies are implemented. I.e. all the rows will have status of either "plant" or "replant." End year is determined by the death year, and replant is simply determined by the death year + 1.

- [ ]  add to that function the ability to add pruning, so the status will now be "plant," "replant," or "prune"
    * pruning will have to be implemented with some sort of year crieteria (in what year, or even better, at what age, should farmers prune?)
    * with the new row that represents the pruning event, death year carries over but is altered slightly (because we are working under the assumption that pruning increases production life)
    * unless another event is called, end year is death year
    
-  [ ] add to that function the ability to add intercropping.
    * intercropping will be implemented with (1) a year criteria and (2) a proportion criteria (what proportion of the trees will be replanted). 
    * if the trees in the originaal dataset reach the year or age criteria, the original row will become inactive because the 'end year' has been reached. The row will then be split into two rows (but both still show the same ID number, which is how we identify them as the same plot). The first row will be the trees that were left alone, and so it will be the exact same except that is has proportionally less cuerdas (the death year will carry over from the original). The second row will have the trees which were replanted. Their


In [3]:
def openYaml(yamlFilePath : str) -> dict: 
    yamlFile = open(yamlFilePath)
    parsed = yaml.load(yamlFile, Loader =yaml.FullLoader)
    return(parsed)

In [5]:
# open the yaml files  to assign attirbutes to the sim
treeAttributes = openYaml("data/trees.yml")
strategyAttributes = openYaml("intervention/strategy1.yml")

In [7]:
# initialData = pd.read_csv("data/demoData.csv")
upData = pd.read_csv("data/demoData.csv")
upData.head()

Unnamed: 0.1,Unnamed: 0,farmerName,treeType,numCuerdas,ageOfTrees
0,0,Estefanía Cazares,catuai,9,9
1,1,Aldo Linares,catuai,1,2
2,2,Aldo Linares,borbon,1,20
3,3,Linda Gabriel Portillo Herrera,catuai,4,7
4,4,Linda Gabriel Portillo Herrera,borbon,10,27


In [8]:
# yearPlanted = 2020 - initialData['ageOfTrees']
# initialData['yearPlanted'] = yearPlanted
# print(initialData.keys())
# new = initialData.rename(columns={'Unnamed: 0': 'plotID'})
# new.to_csv("data/demoDataUpdated.csv",  index = False)

In [9]:
def isolateAttributes(attributes:dict, treeType:str):
    """
    Takes in a dictionary containing all of the tree attributes, as well as the name of
    the tree type, then returns a dictionary with only the attributes of the tree
    `treeType`.
    """
    
    keys = list(treeAttributes.keys())
    altOrth = [treeAttributes[key]['altOrth'] for key in treeAttributes]
    # tipos = keys + altOrth # all of the possible spellings for the tree types
            
    if treeType in keys:
        treeDict = treeAttributes[treeType]
                
    elif treeType in altOrth:
        keyPair = [(key, treeAttributes[key]['altOrth']) for key in treeAttributes]
        _treeType = ''
        for i,e in enumerate(keyPair):
            if treeType == e[1]: # if it's the altOrth
                _treeType = e[0] # key to the key
                
            
            if len(_treeType) > 0:
                treeDict = treeAttributes[_treeType]
                
            else:
                raise AttributeError(
                """
                '%s' is not a recognized value (orthography) in the `treeAttributes` dict.
                
                """%(treeType))
                
    else:
        raise AttributeError(
        """
        '%s' is not a recognized value (orthography) in the `treeAttributes` dict.
                
        """%(treeType))
        
    return(treeDict)

In [10]:
def transformData(year:int,
                  simulationYears:int,
                  farmData:pd.DataFrame,
                  treeAttributes:dict=None,
                  strategyAttributes:dict=None):
    """
    takes in data from repository and returns a new,  transformed dataframe that
    tracks events.
    
    year is an int of the  year where the simulation starts.  if the simulation moves forward
    from  the present, the year is the current year. else it is  the  year the simulation
    begins.
    
    simulationYears is the amount of years that the simulation will iterate through. This
    is necessary to make sure the transformed data only captures events within this range. 
    
    farm data is data frame with farmer's plots
    
    tree attributes is dictionary opened from yaml file with attritbutes of trees.
    
    strategy attributes is dictionary opened from yaml file with attributes of strategies.
    
    returns dataframe with events
    
    
    Notes
    -----
    
    as of now, the condition is that intercrop year and prune year are not in the same. but I might be able to figure out how to work that out. 
    """
    
    endYear = year + simulationYears
    
    # iterate through each row of the original plot dataframe
    for i in range(len(farmData)):
        plotID = farmData["plotID"][i]
        farmerName = farmData["farmerName"][i]
        treeType = farmData["treeType"][i]
        numCuerdas =  farmData["numCuerdas"][i]
        startYear = farmData["yearPlanted"][i]
        
        # assume all are planted for initialization
        status = "plant" 

        treeAge = year - startYear

        # check to see that this tree exists in config file
        # _altOrth = [treeAttributes[item]["altOrth"] for item in treeAttributes]
        
        if treeAttributes:
            # isolate the dictionary we are concerned with on this plot
            treeDict = isolateAttributes(attributes=treeAttributes, treeType=treeType)

            # isolate individual variables from this dict
            cuerdaHarvestCap  = treeDict["cuerdaHarvestCap"]
            firstHarvest = treeDict["firstHarvest"]
            fullHarvest = treeDict["fullHarvest"]
            descentHarvest = treeDict["descentHarvest"]
            death = treeDict["death"]

            # calculate death year
            yearsTillDeath = death["year"] - treeAge
            deathYear = year + yearsTillDeath

            # create the initial row for the transformed dataframe
            row = pd.DataFrame([[plotID, farmerName, treeType, numCuerdas, status, 
                                      startYear, deathYear]], columns=["plotID", "farmerName", "treeType", 
                                                "numCuerdas", "status", "startYear",
                                               "deathYear"])
           
            # if this is the first row of the whole transformation
            if (i == 0):
                # initialize the transformation dataframe
                transformedData = row

            else:
                transformedData = pd.concat([transformedData, row], ignore_index=True)
                #transformedData  = transformedData.reset_index(drop = True, inplace = True)
                
                
            # now you've transformed all of the original entries to the new format
            # now you should be iterating through transformed data to add events

            # check to see if replant is in strategy (it always should be)
            if strategyAttributes["replant"]["isReplant"] ==  True:
                replantYear = (deathYear + 1)
            else:
                replantYear = None

            # check to see if prune  is in strategy config
            if strategyAttributes["prune"]["isPrune"] ==  True:
                pruneAge = strategyAttributes["prune"]["age"]
                lifeExtend = strategyAttributes["prune"]["lifeExtend"]
            else:
                pruneAge = None

            # check to see if  intercrop is in strategy config
            if strategyAttributes["intercrop"] == True:
                intercropAge = strategyAttributes["prune"]["age"]
            else:
                intercropAge = None

            # for this specific plot (see plotID),
            # create a row to check against to see if the program needs to continue creating events
            checkRow = row

            # create a  new var for the year of transformation for this plot
            simYear = year
            # create a new var for the tree's age for this plot
            simTreeAge = treeAge

            # iterate through all years of the simulation to check event sequences
            while (simYear < endYear):
               #  isolate dict
                deathYear = checkRow["deathYear"][0]


                if (replantYear):
                    # death takes precedence over pruning 
                    if (simYear == deathYear):
                        # update death year
                        simTreeAge = -1
                        deathYear = (simYear +  1) + death["year"]
                        status = "replant"
                        nextRow = pd.DataFrame([[plotID, farmerName, treeType, numCuerdas, status, replantYear, deathYear]], 
                                            columns=["plotID", "farmerName","treeType", "numCuerdas", "status", "startYear","deathYear"])
                        transformedData  = pd.concat([transformedData, nextRow],  ignore_index=True)
                        checkRow = nextRow
                        simYear += 1
                        simTreeAge += 1
                        
                        # no more than one action per year IF action is death
                        continue


                    elif (pruneAge):
                        if (simTreeAge == pruneAge):
                            # add years proportional to tree's lifespan:
                            addedYears = round((death["year"] * lifeExtend))
                            adjustedDeathYear = (checkRow["deathYear"][0]) + addedYears
                            pruneYear = simYear
                            status = "prune"
                            nextRow = pd.DataFrame([[plotID, farmerName,treeType, numCuerdas, status, pruneYear, adjustedDeathYear]], 
                                                   columns=["plotID", "farmerName","treeType", "numCuerdas", "status", "startYear","deathYear"])
                            transformedData  = pd.concat([transformedData, nextRow],  ignore_index=True)
                            checkRow  = nextRow
                            simYear += 1
                            simTreeAge += 1
                            continue

                        else:
                            simYear += 1
                            simTreeAge += 1
                            continue
                            
                    else:
                        simYear += 1
                        simTreeAge += 1
                        continue

                else:
                    simYear += 1
                    simTreeAge += 1
                    continue



        else:
            print("No tree attributes!!!")
            print(treeType)
            break
            
        
    return(transformedData)

In [11]:
simData = transformData(year=2020, simulationYears=30, farmData=upData, treeAttributes=treeAttributes, strategyAttributes=strategyAttributes)
simData[10:80]

KeyError: 'plotID'

# events --> class?