# Team Ernst Engel Food Demand Notebook
In this notebook, we examine a system of demands for various food products, and examine heterogeneity in household consumption.

In [1]:
import pandas as pd
import numpy as np
from cfe import Regression
import warnings

In [2]:
%run nutritional_adequacy.ipynb

## Data Processing

In [10]:
def process_data(country, year):
    '''
    Description
    --------------------------------------------------
    This function gets all of the datasets needed for
    the analysis.

    Inputs
    --------------------------------------------------
    + country : string; country of interest
    + year : string; year of interest - in the form
            20XX-XX+1
    
    Outputs
    --------------------------------------------------
    + x : pandas dataframe; contains food
            expenditures of individual households
            across different years
    + y : stack; contains log-normalized
            food expenditures of individual 
            households across different years 
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    + d : stack; contains key characteristics of 
            individual households across 
            different years
    '''
    food_nutrients, food_prices, hh_chars, expenditures, hhc_sub, fp_sub, food_cols, fp_sub_avgs = get_data(country, year)
    warnings.filterwarnings("ignore")
    x = expenditures
    x.columns.name = 'j'
    x = x.T.groupby('j').sum().T
    x = x.replace(0,np.nan)

    y = np.log(x.set_index(['i','t','m']))

    p = fp_sub
    p.columns.name = 'j'

    d = hhc_sub.copy()
    d.columns.name = 'k'
    d.set_index(['i','t','m'],inplace=True)
    
    y = y.stack()

    d = d.stack()
    
    return x, y, p, d

## Estimation
Recall the model put forth in lecture: <br>
Let $y_{i}^j$ be log expenditures on food $j$ by household $i$ at a particular time.  We want to estimate a regression that takes the form
$$
      y^j_{i} = A^j(p) + \gamma_j'd_i + \beta_j w_i + \zeta^j_i.
$$

In [269]:
def run_estimation(y, d):
    '''
    Description
    --------------------------------------------------
    This function runs the regression for the model
    above.

    Inputs
    --------------------------------------------------
    + y : stack; contains log-normalized
            food expenditures of individual 
            households across different years 
    + d : stack; contains key characteristics of 
            individual households across 
            different years
    
    Outputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    '''
    result = Regression(y = y,d = d)
    
    ### Plots
    plt.figure(figsize=(9.25, 6))
    plot_df = pd.DataFrame({'y' : y,'yhat' : result.get_predicted_log_expenditures()})

    sns.scatterplot(data = plot_df, x = 'yhat', y = 'y', alpha = 0.2)

    dummy_x = np.linspace(4, 12, 77)

    ### Overlay y = x line
    plt.plot(dummy_x, dummy_x, color='red', label = r'$y = \hat{y}$')

    plt.xlabel(r'Predicted Expenditures')
    plt.ylabel(r'Real Expenditures')
    plt.title('Real vs. Predicted Expenditures')
    plt.legend()
    plt.show();
    
    ax = result.graph_beta();
    plt.title('Income Elasticity')
    plt.show();
    
    plt.figure(figsize=(9.25, 6))
    ax = result.get_w().plot.hist(bins = 100, density = True)
    result.get_w().plot.kde(ax=ax)
    plt.axvline(x = 0, color = 'black', linestyle = '--')
    plt.ylabel(r'$p(w_{i})$')
    plt.xlabel(r'$w_{i}$')
    plt.title('Household Welfare Distribution')
    plt.show();
    
    return result

## Demand and Utility (Counterfactual Experiment)

In [293]:
def change_price(p0, p, j):
    '''
    Description
    --------------------------------------------------
    This function changes price of jth good to p0, 
    holding other prices fixed.

    Inputs
    --------------------------------------------------
    + p0 : int or float; the new price for the 
        desired good
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    + j : string; the good for which there should be
            a price change
    
    Outputs
    --------------------------------------------------
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years, with the new price p0 
            for good j
    '''
    p = p.copy()
    p.loc[j] = p0
    return p

In [295]:
def plot_demands(food_product, result, p):
    '''
    Description
    --------------------------------------------------
    This function plots an demand curves for a given
    food at different expenditure percentiles.

    Inputs
    --------------------------------------------------
    + food_product : string; the food for which the
            demand curves should be plotted
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    
    Outputs
    --------------------------------------------------
    + Plots the demand curves
    '''
    xhat = result.predicted_expenditures()

    xbar = xhat.groupby(['i','t','m']).sum()

    ### Reference budget
    xref = xbar.quantile(0.5)  # 0.5 ==> median
    
    ### Prices per kilogram:
    pbar = p.mean()
    pbar = pbar[result.beta.index]
    # Vary prices from 50% to 200% of reference.
    scale = np.linspace(.5,2,20)

    # Demandfor household at median budget
    plt.plot([result.demands(xref, change_price(pbar[food_product] * s, pbar, food_product))[food_product] for s in scale], 
             scale, label = 'Median')

    # Demand for household at 25% percentile
    plt.plot([result.demands(xbar.quantile(0.25), change_price(pbar[food_product] * s, pbar, food_product))[food_product] for s in scale], 
             scale, label = '25th Percentile')

    # Demand for household at 75% percentile
    plt.plot([result.demands(xbar.quantile(0.75), change_price(pbar[food_product] * s, pbar, food_product))[food_product] for s in scale], 
             scale, label = '75th Percentile')

    plt.title(f"Demand of {food_product}")
    plt.ylabel(f"Price (relative to base of {pbar[food_product]:.2f})")
    plt.xlabel(f"Quantity")
    plt.legend()
    plt.show();

In [274]:
def plot_engel_curve(result, p, desired_foods = ['All']):
    '''
    Description
    --------------------------------------------------
    This function plots an engel curve for a given
    food.

    Inputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    + desired_foods : list; the food(s) for which an
            engel curve should be plotted, set to 
            'All' foods by default
    
    Outputs
    --------------------------------------------------
    + Plots the engel curve
    '''
    fig,ax = plt.subplots()
    
    xhat = result.predicted_expenditures()

    xbar = xhat.groupby(['i','t','m']).sum()

    ### Reference budget
    xref = xbar.quantile(0.5)  # 0.5 ==> median
    
    ### Prices per kilogram:
    pbar = p.mean()
    pbar = pbar[result.beta.index]

    # Vary prices from 50% to 200% of reference.
    scale = np.linspace(.5,2,20)
    try:
        ax.plot(np.log(scale * xref),
                [(result.expenditures(s * xref, pbar)/(s * xref)).loc[desired_foods] for s in scale])
        
    except:
        ax.plot(np.log(scale * xref),[result.expenditures(s * xref, pbar)/(s * xref) for s in scale])

    ax.set_xlabel(f'log budget (relative to base of {xref:.0f})')
    ax.set_ylabel(f'Expenditure Share')
    ax.set_title('Engel Curves')
    plt.show();  

### Indirect Utility

In [277]:
def plot_indirect_utility(result, p):
    '''
    Description
    --------------------------------------------------
    This function plots the indirect utility curve for 
    a given result.

    Inputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    
    Outputs
    --------------------------------------------------
    + Plots the indirect utility curve
    '''
    xhat = result.predicted_expenditures()

    xbar = xhat.groupby(['i','t','m']).sum()

    ### Reference budget
    xref = xbar.quantile(0.5)  # 0.5 ==> median
    
    ### Prices per kilogram:
    pbar = p.mean()
    pbar = pbar[result.beta.index]

    fig,ax = plt.subplots(figsize=(9.25, 6))

    scale = np.linspace(.5,2,20)
    ax.plot(scale*xref,[result.indirect_utility(s*xref,pbar) for s in scale])
    ax.set_xlabel(f'Indirect Utility (Budget relative to base of {xref:.0f}')
    ax.set_ylabel(f'Utility')
    ax.set_title('Indirect Utility Function')
    plt.show();

## Interactivity Development

In [300]:
def estimate_demand_wrapper(country, year):
    '''
    Description
    --------------------------------------------------
    This is a function that packages up all of the 
    functions in the notebook.

    Inputs
    --------------------------------------------------
    + country : string; country of interest
    + year : string; year of interest - in the form
            20XX-XX+1
    
    Outputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    '''
    country = country.lower()
    x, y, p, d = process_data(country, year_range)
    result = run_estimation(y, d)
    plot_indirect_utility(result, p)
    return result, p

In [246]:
def plot_demands_interactive(result, p):
    '''
    Description
    --------------------------------------------------
    This is a function that makes plot_demands()
    interactive.

    Inputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    
    Outputs
    --------------------------------------------------
    + Interactive widget is displayed
    '''
    widget = interactive(plot_demands,
                         food_product = widgets.Dropdown(options = result.beta.index, description = "Food Product"),
                         result = widgets.fixed(result),
                        p = widgets.fixed(p))
    display(widget)

In [248]:
def plot_engel_curve_interactive(result, p):
    '''
    Description
    --------------------------------------------------
    This is a function that makes plot_engel_curve()
    interactive.

    Inputs
    --------------------------------------------------
    + result : cfe regression; contains the regression
            object for the specified model above
    + p : pandas dataframe; contains the
            prices of all foods of interest across 
            different years
    
    Outputs
    --------------------------------------------------
    + Interactive widget is displayed
    '''
    desired_foods = list(result.beta.index.to_numpy())
    desired_foods.insert(0, 'All')
    widget = interactive(plot_engel_curve,
                         result = widgets.fixed(result),
                        p = widgets.fixed(p),
                        desired_foods = widgets.Dropdown(options = desired_foods, 
                                                         value = 'All', description = "Desired Food"))
    display(widget)