In [1]:
import pandas as pd
import os

In [2]:
def renameCountry(df, curName, newName):
    """
        Change the country in innovation ranking dataset to it's name in the gdpPerCapita dataset.
        Arguments:
            df: The dataframe to edit
            curName: The country's current name in the innovation ranking
            newName: The country's name in the gdpPerCapita dataset
    """
    df.loc[innovationRanking['Country Name'] == curName] = newName

In [3]:
def chooseRanking(rank2019, rank2020):
    """
        Choose the more recent of ranks.
        rank2019: the rank in 2019
        rank2020: the rank in 2020
    """
    
    if (not pd.isna(rank2020)):
        return rank2020
    elif(not pd.isna(rank2019)):
        return rank2019
    else:
        return 200
    
def addRankingColumn(globalInnovationIndexRanking):
    """
        Add a column to globalInnovationIndexRanking that gets the most recent ranking if it's available.
    """
    globalInnovationIndexRanking['Ranking'] = [chooseRanking(twoRanks[0], twoRanks[1]) for twoRanks in zip(globalInnovationIndexRanking['2019'], globalInnovationIndexRanking['2020'])]

In [4]:
def cleanFootNoteCells(combinedDF, cellsToClean):
    """
        Clean cells from the GDP table with footnotes. 
        Arguments:
            combinedDF: the dataframe to edit
            cellsToClean: a list of length three tuples of (country, column, actual_value)
    """
    for c in cellsToClean:
        rowLocation = combinedDF['Country/Territory'].tolist().index(c[0])
        colLocation = [col for col in combinedDF.columns].index(c[1])
        combinedDF.iat[rowLocation, colLocation] = c[2]

In [5]:
def mostRecentGDPEstimate(IMFEstimate, UNEstimate, WorldBankEstimate):
    """
        Get the most recent GDP per capita estimate out of the three agencies. 
        Arguments will be tuples (estimate, year). 
        If all values are 0, return 0
    """
    
    orderedEstimates = sorted([IMFEstimate, UNEstimate, WorldBankEstimate], key=lambda x: int(x[1]))
    return orderedEstimates[2][0]

def createEstimateColumn(combinedDF):
    """
        In the dataframe combinedDF, add a column that gives the most recent available GDP Per Capita estimate
    """
    imfTuple = zip(combinedDF['IMF Estimate'], combinedDF['IMF Year'])
    unTuple = zip(combinedDF['UN Estimate'], combinedDF['UN Year'])
    wbTuple = zip(combinedDF['World Bank Estimate'], combinedDF['World Bank Year'])
    combinedDF['GDP Per Capita'] = [mostRecentGDPEstimate(r[0], r[1], r[2]) for r in zip(imfTuple, unTuple, wbTuple)]