In [None]:
!pip install -r requirements_2.txt
#!pip install matplotlib

In [None]:
# just setting up functions to do demand stuff with 

import numpy as np
import pandas as pd
from cfe.estimation import drop_columns_wo_covariance
from cfe import Regression
from eep153_tools.sheets import read_sheets
!pip install fooddatacentral

In [None]:
sheets = '1J7d0WCnpAcMoaAScLzqaAEeexuUPdcPRYO42OPjJPu8' #read in sheets

In [None]:
x = read_sheets(sheets,sheet='Food Expenditures') # grab food expenditures, set meta index
x.columns.name = 'j'

p = read_sheets(sheets,sheet='PriceAsColumns').set_index(['t','m']) # grab prices 
p.columns.name = 'j'

d = read_sheets(sheets,sheet="Household Characteristics") # grab household characteristics
d.columns.name = 'k'
d.set_index(['i','t','m'],inplace=True)

d_dummy = read_sheets(sheets, sheet='HHChar_wDummy')
d_dummy.columns.name = 'k'
d_dummy.set_index(['i', 't', 'm'], inplace=True)

x = x.T.groupby('j').sum().T  
x = x.replace(0,np.nan) 

y = np.log(x.set_index(['i','t','m'])) # take log of expenditures, name y

In [None]:
# doing this to get index

d_R = d_dummy[d_dummy['Rural'] == 1]
d_U = d_dummy[d_dummy['Rural'] == 0]
d_R.reset_index(inplace=True)
d_U.reset_index(inplace=True)

In [None]:
rural_index = d_R['i']
urban_index = d_U['i']

In [None]:
x_R = x[x['i'].isin(rural_index)]
x_U = x[x['i'].isin(urban_index)]

In [None]:
y_R = np.log(x_R.set_index(['i','t','m'])) # take log of expenditures, name y
y_U = np.log(x_U.set_index(['i','t','m'])) 

In [None]:
d_R = d_dummy[d_dummy['Rural'] == 1]
d_U = d_dummy[d_dummy['Rural'] == 0]

In [None]:
print(y_R.index.names)
print(d_R.index.names)

In [None]:
from cfe import Regression

In [None]:
#This cell is to run a regression estimation to predict household expenditures

def regress(y, d):
    
    y = y.stack()
    d = d.stack()
    # Check that indices are in right places!
    assert y.index.names == ['i','t','m','j']
    assert d.index.names == ['i','t','m','k']
    result = Regression(y=y,d=d)
    predicted_expenditures = pd.DataFrame({'y':y,'yhat':result.get_predicted_log_expenditures()})
    return result, predicted_expenditures

In [None]:
all, x_pred_all = regress(y, d)
#print(all.predicted_expenditures())

In [None]:
rural, x_pred_R = regress(y_R, d_R)
urban, x_pred_U = regress(y_U, d_U)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()

fig, ax = plt.subplots(1,2, figsize = (15,7), dpi=300)

ax[0].scatter(x_pred_R['yhat'], x_pred_R['y'])
ax[0].set_xlabel('Actual Log Expenditures')
ax[0].set_ylabel('Predicted Log Expenditures')
ax[0].set_title('RURAL: Predicted vs. Actual Expenditures')

ax[1].scatter(x_pred_U['yhat'], x_pred_U['y'])
ax[1].set_xlabel('Actual Log Expenditures')
ax[1].set_ylabel('Predicted Log Expenditures')
ax[1].set_title('URBAN: Predicted vs. Actual Expenditures')

plt.savefig('Predicted_Expenditures.png')

In [None]:
all_betas.index

In [None]:
all_betas = all.get_beta()
rural_betas = rural.get_beta()
urban_betas = urban.get_beta()

#def get_beta_summary(betas): 
  #  max = betas[beta
   # print(f'The most Frische-elastic good is {betas==betas[max(betas)]}'.format(str))
   # print(f'The most Frische-inelastic good is {min(betas)}'.format(str))

In [None]:
rural.graph_beta() 
plt.title('Rural Frisch Elasticities')
plt.savefig('RuralFrisch.png')

In [None]:
urban.graph_beta() 
plt.title('Urban Frisch Elasticities')
plt.savefig('UrbanFrisch.png')

In [None]:
rural_max.index

In [None]:
beta_dict = {'All' : all.get_beta(), 
             'Rural' : rural.get_beta(), 
             'Urban' : urban.get_beta()}
betas = pd.DataFrame(beta_dict) # made a dataframe with Frisch elasticities for each good in each place
betas

In [None]:
print(betas_inelastic.index)
print(rural_betas_inelastic.index)

In [None]:
all_betas_inelastic = betas[betas['All']<1]
rural_betas_inelastic = betas[betas['Rural']<1]
urban_betas_inelastic = betas[betas['Urban']<1]

In [None]:
rural_betas_inelastic

In [None]:
import plotly.express as px

In [None]:
fig = px.bar(all_betas_inelastic[['Rural', 'Urban']], barmode='group', title = 'Urban-Rural Elasticity Comparison, Elasticity < 1', height=800, width=1200)
fig.update_layout(yaxis_title='Elasticity Magnitude', xaxis_title = 'Food')
fig.write_image('Elasticity_Mag_Comparison.png')

In [None]:
fig = px.bar(rural_betas_inelastic[['Rural', 'Urban']], barmode='group', title = 'Urban-Rural Elasticity Comparison', height=800, width=1200)
fig.update_layout(yaxis_title='Elasticity Magnitude', xaxis_title = 'Food')
fig.write_image('Elasticity_Mag_Comparison.png')
fig

In [None]:
px.scatter(rural_betas_inelastic[['Rural', 'Urban']])

In [None]:
all_max = betas[betas['All']==max(betas['All'])]
all_min = betas[betas['All']==min(betas['All'])]
    
rural_max = betas[betas['Rural']==max(betas['Rural'])]
rural_min = betas[betas['Rural']==min(betas['Rural'])]
    
urban_max = betas[betas['Urban']==max(betas['Urban'])]
urban_min = betas[betas['Urban']==min(betas['Urban'])]

    
#print(f'In Guatemala, the most Frische-elastic good is {all_max.index} at {all_max['All']}'.format(str))
print(f'In Guatemala, the most Frische-elastic good is {all_max.index[0]} at Elasticity = {all_max["All"].iloc[0]}')
print(f'In Guatemala, the most Frische-inelastic good is {all_min.index[0]} at Elasticity = {all_min["All"].iloc[0]}')
print(' ')
print(f'In urban areas, the most Frische-elastic good is {urban_max.index[0]} at Elasticity = {urban_max["Urban"].iloc[0]}')
print(f'In urban, the most Frische-inelastic good is {urban_min.index[0]} at Elasticity = {urban_min["Urban"].iloc[0]}')
print(' ')
print(f'In rural areas, the most Frische-elastic good is {rural_max.index[0]} at Elasticity = {rural_max["Rural"].iloc[0]}')
print(f'In rural, the most Frische-inelastic good is {rural_min.index[0]} at Elasticity = {rural_min["Rural"].iloc[0]}')



In [None]:
all_gamma = all.get_gamma()
rural_gamma = rural.get_gamma()
urban_gamma = urban.get_gamma()

In [None]:
all_gamma.columns

In [None]:
px.scatter(rural_gamma.drop(columns='Constant'))

In [None]:
w_dict = {'All' : all.get_w(), 'Rural' : rural.get_w(), 'Urban' : urban.get_w()}
welfare = pd.DataFrame(w_dict) # made a dataframe with Frisch elasticities for each good in each place
welfare

In [None]:
#This histogram illustrates the distribution of welfare measure of households
plt.figure(dpi=200)
welfare['Rural'].plot.hist(bins=50,density=True, rwidth=0.95)
plt.title('Rural Welfare Distribution')
plt.savefig('Rural_Welfare_Distribution')
plt.figure(dpi=200)
welfare['Urban'].plot.hist(bins=50,density=True, rwidth=0.95)
plt.title('Urban Welfare Distribution')
plt.savefig('Urban_Welfare_Distribution')
#result.get_w().plot.kde(ax=ax)

In [None]:
pbar = p.mean()

In [None]:
#Finally, define a function to change a single price in the vector $p$:

def my_prices(p0,p=pbar,j=''):
    """
    Change price of jth good to p0, holding other prices fixed.
    """
    p = p.copy()
    p.loc[j] = p0
    return p

def plot_demand(regression, food=''): 
    # set the desired food for the demand plot
    use = food
    # get predicted expenditures
    xhat = regression.predicted_expenditures()
    # Total food expenditures per household
    xbar = xhat.groupby(['i','t','m']).sum()
    # Reference budget
    xref = xbar.quantile(0.5)  # Household at 0.5 quantile is median
    # Reference prices chosen from a particular time; average across place.
    # These are prices per kilogram:
    pbar = p.mean()
    pbar = pbar[regression.beta.index] # Only use prices for goods we can estimate
    # Vary prices from 50% to 200% of reference.
    scale = np.linspace(.5,2,20)

    plt.figure(figsize = (5,5), dpi=300)
    # Demand for Food for household at median budget
    plt.plot([regression.demands(xref,my_prices(pbar[use]*s,pbar, j=use))[use] for s in scale],scale, 
            label = 'Median Budget')
    # Demand for Avocado for household at 25% percentile
    plt.plot([regression.demands(xbar.quantile(0.25),my_prices(pbar[use]*s,pbar,j=use))[use] for s in scale],scale, 
            label = '25th Percentile')
    # Demand for Avocado for household at 75% percentile
    plt.plot([regression.demands(xbar.quantile(0.75),my_prices(pbar[use]*s,pbar, j=use))[use] for s in scale],scale, 
            label = '75th Percentile')
    plt.legend(facecolor = 'white')

    plt.ylabel(f"Price (relative to base {pbar[use]:.2f})")
    plt.xlabel(f"Quantity Demanded")
    plt.title(f'{use}', fontsize = 15)

## Plotting Demand for Different Brackets

In [None]:
plot_demand(all, food='Peas')

In [None]:
plot_demand(all, food = 'Ice Cream')
plt.title('Ice Cream, Highest Elasticity Good')
plt.savefig('IceCream_Guatemala_OverallDemand.png')

In [None]:
plot_demand(all, food = 'Maize')
plt.title('Maize, Lowest Elasticity Good')
plt.savefig('Maize_Guatemala_OverallDemand.png')

In [None]:
def plot_demand_comparison(regression1, regression2, food=''): 
    # set the desired food for the demand plot
    use = food
    # get predicted expenditures
    xhat1 = regression1.predicted_expenditures()
    xhat2 = regression1.predicted_expenditures()
    # Total food expenditures per household
    xbar1 = xhat1.groupby(['i','t','m']).sum()
    xbar2 = xhat2.groupby(['i','t','m']).sum()
    # Reference budget
    xref1 = xbar1.quantile(0.5)  # Household at 0.5 quantile is median
    xref2 = xbar2.quantile(0.5)  # Household at 0.5 quantile is median
    # Reference prices chosen from a particular time; average across place.
    # These are prices per kilogram:
    pbar = p.mean()
    pbar1 = pbar[regression1.beta.index] # Only use prices for goods we can estimate
    pbar2 = pbar[regression2.beta.index] # Only use prices for goods we can estimate
    
    # Vary prices from 50% to 200% of reference.
    scale = np.linspace(.5,2,20)

    plt.figure(figsize = (5,5), dpi=200)
    # Demand for Food for Urban Households at median budget
    plt.plot([regression1.demands(xref1,my_prices(pbar1[use]*s,pbar1, j=use))[use] for s in scale],scale, 
            label = f'Urban, (Elasticity = {regression1.get_beta()[use]:.2f})', lw = 2, color = 'dodgerblue')
    # Demand for Food for Rural Households at median budget
    plt.plot([regression2.demands(xref2,my_prices(pbar2[use]*s,pbar2, j=use))[use] for s in scale],scale, 
            label = f'Rural, (Elasticity = {regression2.get_beta()[use]:.2f})', lw = 2, color = 'goldenrod')
    
    plt.legend(facecolor = 'white')

    plt.ylabel(f"Price [relative to base {pbar[use]:.2f}]")
    plt.xlabel(f"Quantity Demanded")
    plt.title(f'{use}', fontsize = 15)

    #plt.savefig(f'Urban-Rural_DemandComparison_{use}.png')
    plt.savefig(f'U-R_BigElDiff_DemandComparison_{use}.png')

In [None]:
plot_demand_comparison(urban, rural, food = 'Ice Cream')
plot_demand_comparison(urban, rural, food = 'Maize')

In [None]:
betas['$\Delta$ (|U-R|)'] = np.abs(betas['Urban'] - betas['Rural'])
betas = betas.dropna()

In [None]:
betas = betas.sort_values(by='$\Delta$ (|U-R|)', axis=0, ascending=False)

In [None]:
betas

In [None]:
big_diff_foods = betas[0:15].index

## From the Differences, Plot the Demand curves for the goods where there's the largest difference between the elaasticities: 

In [None]:
#first5_bigdiff = big_diff_foods[0:4] 

for i in big_diff_foods: 
    plot_demand_comparison(urban, rural, food=i)

In [None]:
for i in big_diff_foods: 
    plot_demand_comparison(urban, rural, food = i)

In [None]:
#plt.figure()
plot_demand_comparison(urban, rural, food = 'Bread (french)')
#plt.figure()
plot_demand_comparison(urban, rural, food = 'Milk (powdered)')
#plt.figure()
plot_demand_comparison(urban, rural, food = 'Bread (sweet)')
#plt.figure()
plot_demand_comparison(urban, rural, food = 'Oranges')

In [None]:
# This finds the food budget for all households and determines the median food budget

import numpy as np

xhat = all.predicted_expenditures()

# Total food expenditures per household
xbar = xhat.groupby(['i','t','m']).sum()

# Reference budget
xref = xbar.quantile(0.5)  # Household at 0.5 quantile is median

In [None]:
# This chooses a reference price for avocados

# Reference prices chosen from a particular time; average across place.
# These are prices per kilogram:
pbar = p.mean()
pbar = pbar[all.beta.index] # Only use prices for goods we can estimate

#Finally, define a function to change a single price in the vector $p$:
def my_prices(p0,p=pbar,j='Avocado'):
    """
    Change price of jth good to p0, holding other prices fixed.
    """
    p = p.copy()
    p.loc[j] = p0
    return p

In [None]:
#This graph shows the demand curves of households for avocado at different budgets

import matplotlib.pyplot as plt
%matplotlib inline

use = 'Avocado'  # Good we want demand curve for

# Vary prices from 50% to 200% of reference.
scale = np.linspace(.5,2,20)

# Demand for Avocado for household at median budget
plt.plot([all.demands(xref,my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

# Demand for Avocado for household at 25% percentile
plt.plot([all.demands(xbar.quantile(0.25),my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

# Demand for Avocado for household at 75% percentile
plt.plot([all.demands(xbar.quantile(0.75),my_prices(pbar[use]*s,pbar))[use] for s in scale],scale)

plt.ylabel(f"Price (relative to base of {pbar[use]:.2f})")
plt.xlabel(f"Quantities of {use} Demanded")

In [None]:
import plotly.express as px
px.scatter(x_Rural)

In [None]:
px.scatter(x_Urban)

In [None]:
px.bar(x_Rural, x=food_columns)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(x_Rural[food_columns])

In [None]:
food_columns = ['Avocado', 'Bananas', 'Beans', 'Beef', 'Beef Entrails', 'Beer', 'Beets',
       'Bread (french)', 'Bread (sliced)', 'Bread (sweet)', 'Brown Sugar',
       'Butter', 'Cabbages', 'Cakes', 'Candies', 'Carrots', 'Celery',
       'Chayote', 'Chicken', 'Chicken Giblets', 'Chilies', 'Chocolate',
       'Cigarettes', 'Coffee (instant)', 'Cookies', 'Cooking Oil',
       'Corn Atole', 'Corn Dough (fresh(', 'Corn Flakes', 'Corn Tamales',
       'Cornmeal', 'Crema Fresca', 'Cucumbers', 'Dried Fruit', 'Dried Seeds',
       'Eggs', 'Fish (Canned)', 'Fish (Fresh)', 'Garlic', 'Granulated Sugar',
       'Herbs', 'Honey, Molasses', 'Ice Cream', 'Incaparina', 'Infant Formula',
       'Jam', 'Juice (Branded)', 'Juices (Packaged)', 'Lard',
       'Leafcutter Ants and Other Insect', 'Lemons', 'Lettuce', 'Liquor',
       'Maize', 'Mangos', 'Margarine', 'Melons', 'Milk', 'Milk (condensed)',
       'Milk (powdered)', 'Mineral Water', 'Mushrooms', 'Onions', 'Oranges',
       'Other', 'Other Atoles', 'Other Canned Goods', 'Other Sauces', 'Paches',
       'Papaya', 'Pasta', 'Peas', 'Pineapples', 'Pork', 'Pork (cracklings)',
       'Potatoes', 'Pumpkin', 'Queso', 'Rice', 'Rolled Oats', 'Rural', 'Salt',
       'Sausages', 'Soup', 'Spices', 'Sweets', 'Tea', 'Tomato Sauce',
       'Tomatoes', 'Tortillas', 'Tostadas', 'Vegetable Oil', 'Water (bottle)',
       'Watermelon', 'Wheat Flour', 'Yogurt', 'Yucca'