# Goal: Using historical data on budget code changes, build out the "budget history" for any budget code of our choosing.
**Key use case:** Often, we get requests to summarize the funding history of a program, particularly when that program becomes of interest to the mayor, or it is slated to receive new funding. With our current record keeping, finding the funding history of a budget code is "clunky" and manual -- you must sort through FMS screens and manually record the data. Often, you can't even copy and paste. This script aims to simplify that process, using a download of budget *changes* to build the history of a particular budget code.

**Goal Output**: A dataframe where each row uniquely identifies a Plan and Year (example, FY25 Executive Plan would be one column, FY25 Adopted Plan would be another) and shows us, for each year where the budget code has existed, how much funding was in the budget for that given Plan and Year.

In [None]:
def budget_history_by_plan_by_bc(budget_code):

    ## Import needed packages
    
    import pandas as pd
    import numpy as np

    ## Load in expense initiatives (these are changes to the budget code) and lcean up the data
    expense_initiatives = pd.read_excel("YOUR_FILE.xlsx")   
    expense_initiatives.columns = expense_initiatives.columns.str.replace('\n', '', regex=False)
    expense_initiatives.dropna(subset='EXP_AGY', inplace=True)
    
    ## Include only unit of appropriation 006, and only include the chosen budget code
    expense_initiatives=expense_initiatives[expense_initiatives['EXP_UNIT_OF_APPR']==6.0]
    expense_initiatives=expense_initiatives[expense_initiatives['EXP_BUDGET_CODE_X']==budget_code]

    ## Make sure we are only including changes that have been approved.
    expense_initiatives=expense_initiatives[expense_initiatives['APV_STS_CD']==2.0]
    
    ## Define a dictionary that creates an order of financial plans
    plan_order = {'NOV': 1, 'JAN': 2, 'EXEC': 3, 'ADP': 4, 'MOD': 5}
    
    # Custom sort order for Mod Type
    
    def is_mod(init_type):
        if init_type=='MOD':
            return 'Yes'
        else:
            return 'No'
        
    mod_type_order = {'Yes': 0, 'No': 1}
    
    # Add new columns to sort by a) what plan the change occurred at and b) whether the change was an inter-plan mod.
    # We want mods first, since they occur before the financial plan is executed.
    
    expense_initiatives['Plan_sort'] = expense_initiatives['PLAN_COND'].map(plan_order)
    expense_initiatives['ModType_sort'] = expense_initiatives['INIT_TYPE'].map(mod_type_order)
    
    # Sort the DataFrame by budget code, then plan year, then the type of plan, then whether it was a mod
    expense_initiatives = expense_initiatives.sort_values(by=['EXP_BUDGET_CODE_X', 'PlanYear', 'Plan_sort', 'ModType_sort'])
    
    # Drop the temporary sorting columns
    expense_initiatives = expense_initiatives.drop(columns=['Plan_sort', 'ModType_sort'])

    # Create a string represnenting the plan type and year (ex: 2025 November Plan = 2025 NOV
    expense_initiatives['PlanYear']=expense_initiatives['PlanYear'].astype(int)
    expense_initiatives['YearPlan'] = expense_initiatives['PlanYear'].astype(str)+'/'+expense_initiatives['PLAN_COND']
    
    ## Turn our expense initiatives dataframe into list of dictionaries 
    expense_initiatives_dict = expense_initiatives.to_dict(orient='records')
    
    ## Relabel year keys. In the download, the columns (current year, out-year 1, etc) are specific to the year when the initiative occurred.
    ## Change the key so that it represents the fiscal year where the change occurred.
    
    for initiative in expense_initiatives_dict:
        initiative[int(f"{initiative['PlanYear'] -1 }")] = initiative.pop('CURR_EXP_AMT')
        initiative[int(f"{initiative['PlanYear']  }"  )] = initiative.pop('OY1_EXP_AMT')
        initiative[int(f"{initiative['PlanYear'] +1 }")] = initiative.pop('OY2_EXP_AMT')
        initiative[int(f"{initiative['PlanYear'] +2 }")] = initiative.pop('OY3_EXP_AMT')
        initiative[int(f"{initiative['PlanYear'] +3 }")] = initiative.pop('OY4_EXP_AMT')
    
    
    from collections import OrderedDict
        
    # Find the first year where a budget code was introduced and the latest year where it has a non-zero budget
    
    first_year = int(list(expense_initiatives_dict[0].keys())[25])
    latest_year = int(list(expense_initiatives_dict[-1].keys())[-1])

    ## Create a list of all years in our data
    all_years = [year for year in range(first_year, latest_year-4)]

    ## Now, set up a code to "baseline" plan changes. That is, if a program was funded at $X throughout the plan window, it should
    ## have $X in later years for posterity automatically, until we decide to change the bidget 
    for entry in expense_initiatives_dict:
        latest_year_original=int(list(entry.keys())[29])
        if entry[(latest_year_original)]!=0:
            for outyear in range(latest_year_original+1, latest_year+1):
                entry[(outyear)]=entry[(latest_year_original)]
        elif entry[(latest_year_original)]==0:
            for outyear in range(latest_year_original+1, latest_year+1):
                entry[(outyear)]=0        
    
    ## ADD IN-YEARS: Add a 0 budget for any year before a budget code was introduced.
    
    for entry in expense_initiatives_dict:
        for inyear in all_years:
            if inyear not in entry.keys():
                entry[(inyear)]=0        
    
    # Step 1: Turn our keys into string values (some are numeric year values, while some are already strings)
    
    for entry in expense_initiatives_dict:
        numeric_keys = [key for key in entry.keys() if isinstance(key, int)]
        string_keys = {key: entry[key] for key in entry.keys() if isinstance(key, str)}
        
        # Step 2: Sort numeric keys
        sorted_numeric_keys = sorted(numeric_keys)
        
        # Step 3: Create a new dictionary
        entry = {key: entry[key] for key in sorted_numeric_keys}
        entry.update(string_keys)

    ## Initialize empty dictionary; plan_data will form the basis of our dataframe 
    
    plan_data = {}
    
    # Sort subkeys in each year_plan
    for year_plan in plan_data:
        
        # Create an ordered dictionary, wherein we sort the year and plan value from earliest to latest
        sorted_subkeys = OrderedDict(sorted(plan_data[year_plan].items()))
        plan_data[year_plan] = sorted_subkeys
    
    ## Create a list of all year-plans where the budget code has existed 
    
    year_plan_list = set(list(expense_initiatives['YearPlan']))
    year_plan_set = (expense_initiatives[['PlanYear', 'PLAN_COND']].drop_duplicates(subset=['PlanYear', 'PLAN_COND'])).values.tolist() 
    all_year_list = [year for year in range(first_year,latest_year+1)]

    ## Set up the dictionary. Fill in 0 budgets as placeholder values. 
    
    for year_plan in year_plan_list:
        plan_data[year_plan] = {}  # Initialize the sub-dictionary of budget values for each year_plan
    
        for key in plan_data.keys():
    
            for year in all_year_list:
                plan_data[year_plan][(year)] = 0
    
    ## Finally, fill in our 0-value placeholders with the actual budget based on the history

    ## Loop through each year-plan combination for each budget code, and find the most recent (last) initiative approved 
    
    for year_plan in year_plan_list:
        
        last_entry = None
        for entry in reversed(expense_initiatives_dict):
            if entry.get('YearPlan') == year_plan:
                last_entry = entry

                ## identify the index of the very last initiative entry for our bugdet code
                
                last_entry_index = expense_initiatives_dict.index(last_entry)
    
        ## Iterate over all initiatives in our initiatives list for our given budget code up until the very last (i.e. most recent) initiative
        ## These initiatives are the "relevant initiatives" where a budget change actually happened
        
        relevant_initiatives = [initiative for initiative in expense_initiatives_dict if expense_initiatives_dict.index(initiative)<=last_entry_index]

        ## Loop through our relevant initiatives, and for each plan, add the amount from the last plan to the running value for the budget 
        for initiative in relevant_initiatives:
            for year in list(expense_initiatives_dict[last_entry_index].keys())[25:]:
                if year in initiative:

                    ## this is the most critical line: It sets the budget at a given plan for a given FY by adding the current initiative 
                    ## budget change to the most recent budget 
                    plan_data[expense_initiatives_dict[last_entry_index]['YearPlan']][year] += initiative[year]

    ## Now, turn this dictionary into a dataframe. First reverse to show the earliest plan-year, then transpose and rename columns
    
    reversed_plan_data = {k: plan_data[k] for k in reversed(plan_data)}
    
    plan_data_df = pd.DataFrame(reversed_plan_data).T
    
    plan_data_df = plan_data_df.reindex(sorted(plan_data_df.columns), axis=1)
    
    plan_data_df=plan_data_df.reset_index()
    plan_data_df=plan_data_df.rename(columns={'index':'Year/Plan'})
    
    plan_data_df['Plan_Year'] = plan_data_df['Year/Plan'].str[0:4].astype(int)
    plan_data_df['Plan'] = plan_data_df['Year/Plan'].str[5:]
    
    plan_data_df['Plan_sort'] = plan_data_df['Plan'].map(plan_order)
    plan_data_df = plan_data_df.sort_values(by=['Plan_Year', 'Plan_sort'])
    plan_data_df = plan_data_df.drop(columns=['Plan_sort'])

    return plan_data_df

## Execute the function

budget_code = input("Enter the budget code: ")
budget_history_by_plan_by_bc(budget_code)