In [9]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [10]:
nielsen15 = pd.read_csv('../../Nielsen/aggregated_nielsen_2015.csv')
nielsen16 = pd.read_csv('../..//Nielsen/aggregated_nielsen_2016.csv')
nielsen15['year'] = 2015
nielsen16['year'] = 2016

In [11]:
nielsen = pd.concat((nielsen15, nielsen16))
nielsen = nielsen[~nielsen.is_walmart]


In [12]:
## Entriy/exit dates
fandom = pd.read_csv('../data_collection/plein_de_data/fandom_traitées.csv', parse_dates=['Opening_date', 'Closing_date'])[['State', 'County_name', 'County_fips', 'Opening_date', 'Closing_date']]

# We drop the state in which we do not trust our data (some mistakes stillremain)
fandom = fandom[~np.isin(fandom.State, ('CA', 'GA', 'KS', 'LA', 'TX'))]
nielsen = nielsen[~np.isin(nielsen.store_state, ('CA', 'GA', 'KS', 'LA', 'TX'))]

# We concentrate our study on the movements (entries & exits) during the fiscal years 2015 and 2016
movements = fandom[((fandom.Opening_date >= '2015-01-31') & (fandom.Opening_date <= '2017-01-31')) | ((fandom.Closing_date >= '2015-01-31') & (fandom.Closing_date <= '2017-01-31'))]
#movements['year'] = movements.Opening_date.dt.year
#movements['month'] = movements.Opening_date.dt.month

In [13]:
nielsen.groupby('product_group_descr').mean().sort_values('upc_price_std', ascending=True).head(10)

  nielsen.groupby('product_group_descr').mean().sort_values('upc_price_std', ascending=True).head(10)


Unnamed: 0_level_0,is_walmart,guessed_store_county_fips,purchase_year,purchase_month,upc_price,upc_price_std,nb_of_obs,year
product_group_descr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
YEAST,0.0,34358.913876,2015.550239,6.937799,3.457727,0.190346,1.186603,2015.550239
"JUICES, DRINKS-FROZEN",0.0,31276.076638,2015.491695,6.35975,1.825015,0.440367,4.133884,2015.491695
ICE,0.0,29888.888,2015.499846,6.668,2.3181,0.4655,2.631077,2015.499846
VEGETABLES - CANNED,0.0,31244.049491,2015.498625,6.517163,1.038096,0.517872,29.851215,2015.498625
PASTA,0.0,31698.057646,2015.500946,6.462988,1.449016,0.546721,15.659901,2015.500946
"PUDDING, DESSERTS-DAIRY",0.0,31518.91459,2015.495125,6.446009,2.434284,0.547729,4.56952,2015.495125
FRUIT - CANNED,0.0,31497.732971,2015.496603,6.641208,1.654077,0.633034,10.802289,2015.496603
DOUGH PRODUCTS,0.0,31231.945841,2015.496236,6.548513,1.972625,0.634339,8.20764,2015.496236
SOUP,0.0,31434.031,2015.497921,6.540429,1.506098,0.698717,25.787703,2015.497921
GUM,0.0,31532.015477,2015.496877,6.441103,1.745169,0.737202,6.212702,2015.496877


In [14]:
categories = [
    "FRESH PRODUCE",
    "BREAD AND BAKED GOODS",
    "MILK",
    "SNACKS",
    "PACKAGED MEATS-DELI",
    "CHEESE",
    "UNPREP MEAT/POULTRY/SEAFOOD-FRZN",
    "CARBONATED BEVERAGES",
    "CONDIMENTS, GRAVIES, AND SAUCES",
    "CANDY",
    "JUICE, DRINKS - CANNED, BOTTLED",
    "EGGS",
    "CEREAL",
    "PASTA",
]

In [15]:
state_for_cat = {"FRESH PRODUCE" : "FL",
    "BREAD AND BAKED GOODS" : "FL",
    "MILK" : "FL",
    "SNACKS" : "FL",
    "PACKAGED MEATS-DELI" : "FL",
    "CHEESE" : "FL",
    "UNPREP MEAT/POULTRY/SEAFOOD-FRZN" : "FL",
    "CARBONATED BEVERAGES" : "FL",
    "CONDIMENTS, GRAVIES, AND SAUCES" : "FL",
    "CANDY" : "FL",
    "JUICE, DRINKS - CANNED, BOTTLED" : "FL",
    "EGGS" : "FL",
    "CEREAL" : "FL",
    "PASTA" : "FL"}

## PREMIER MODELE

$$Prices_{i, t} = \alpha + \beta treat_i + \gamma treat_i* post_t$$

#### In a single state

##### FL

In [21]:
state = "FL"

In [22]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 39.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
MILK
Coef : -0.02488351043821979
Coef/err : 1.6053059640572942
CI_up : -0.05445816333846343
CI_down : 0.005616178309467701
Size of the control group: 39.
Size of the treatment group: 10.
SNACKS
Coef : 0.041061644148431276
Coef/err : 1.2311049690059088
CI_up : -0.02361168079419307
CI_down : 0.11001875544619799
Size of the control group: 38.
Size of the treatment group: 10.
PACKAGED MEATS-DELI
Coef : -0.05806615363069323
Coef/err : 1.824276891388022
CI_up : -0.1167624235780289
CI_down : 0.0045308245719313245
Size of the control group: 38.
Size of the treatment group: 10.
CHEESE
Coef : -0.0345895792428933
Coef/err : 1.5518160232127651
CI_up : -0.07661710892839069
CI_down : 0.009350822414290194
Size of the control group: 39.
Size of the treatment group: 9.
Size of the control group: 39.
Size of the tr

##### SC

In [23]:
state = "SC"

In [24]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
MILK
Coef : -0.07800429529166508
Coef/err : 3.4987482014331537
CI_up : -0.11907852189898538
CI_down : -0.035014923994007185
Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
CHEESE
Coef : -0.0947775261385293
Coef/err : 3.2596637359269685
CI_up : -0.14747196583965083
CI_down : -0.03882606278052603
Size of the control group: 27.
Size of the treatment group: 9.
UNPREP MEAT/POULTRY/SEAFOOD-FRZN
Coef : -0.048480308554737395
Coef/err : 1.2403480695834481
CI_up : -0.12045739164558544
CI_down : 0.029386995704542507
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Si

##### VA

In [25]:
state = "VA"

In [26]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 95.
Size of the treatment group: 10.
Size of the control group: 95.
Size of the treatment group: 10.
BREAD AND BAKED GOODS
Coef : 0.07046983675354812
Coef/err : 1.8771795873301251
CI_up : -0.003039142388288174
CI_down : 0.14939885819014354
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 95.
Size of the treatment group: 10.
SNACKS
Coef : 0.06780540090502085
Coef/err : 1.7355046995743726
CI_up : -0.008492487601254872
CI_down : 0.14997451854241306
Size of the control group: 94.
Size of the treatment group: 10.
PACKAGED MEATS-DELI
Coef : -0.05093125217700256
Coef/err : 1.2074275185990706
CI_up : -0.1281850378051177
CI_down : 0.033168191822069426
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 92.
Size of the treatment group: 10.
Size of the control group: 94.
Size of the treatment group: 10.
CONDIMENTS, GRAVIES, AND

#### In all states

In [27]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 1831.
Size of the treatment group: 91.
FRESH PRODUCE
Coef : -0.019621247262986752
Coef/err : 1.5430619774681942
CI_up : -0.04399037238436332
CI_down : 0.005369058066232002
Size of the control group: 1836.
Size of the treatment group: 91.
Size of the control group: 1822.
Size of the treatment group: 91.
MILK
Coef : -0.0480801454734886
Coef/err : 4.722368379334626
CI_up : -0.06735054500610205
CI_down : -0.028411581018183085
Size of the control group: 1837.
Size of the treatment group: 91.
Size of the control group: 1816.
Size of the treatment group: 91.
Size of the control group: 1816.
Size of the treatment group: 91.
CHEESE
Coef : -0.027670775680054716
Coef/err : 2.1548667078264567
CI_up : -0.0521740931634157
CI_down : -0.0025339952755394357
Size of the control group: 1794.
Size of the treatment group: 90.
Size of the control group: 1815.
Size of the treatment group: 91.
CARBONATED BEVERAGES
Coef : -0.019389046844327407
Coef/err : 1.029692451449872
CI_up : -0.

## DEUXIEME MODELE

$$Prices_{i, t} = \alpha + \beta treat_i + \gamma treat_i* post_t + \sum_{\tau=Janv15}^{Dec16}\gamma_{\tau} \cdot \mathbb{1}(t=\tau)$$

#### In a single state

##### FL

In [28]:
state = "FL"

In [29]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False
    control['time_effects'] = list(zip(control.purchase_year, control.purchase_month))

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)
    treatment['time_effects'] = list(zip(treatment.purchase_year, treatment.purchase_month))


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction','time_effects']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction + C(time_effects)', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 39.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
Size of the control group: 39.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
PACKAGED MEATS-DELI
Coef : -0.06200589046535254
Coef/err : 1.8388709479139225
CI_up : -0.12393567893430113
CI_down : 0.004301771417209332
Size of the control group: 38.
Size of the treatment group: 10.
Size of the control group: 39.
Size of the treatment group: 9.
Size of the control group: 39.
Size of the treatment group: 10.
Size of the control group: 38.
Size of the treatment group: 10.
Size of the control group: 39.
Size of the treatment group: 10.
CANDY
Coef : -0.06010741320629387
Coef/err : 1.3022633715313883
CI_up : -0.14392565573260152
CI_down : 0.03191747378640186
Size of the control group: 38.
Size of the treatment group: 9.
Size of the control group: 37.
Size o

##### SC

In [30]:
state = "SC"

In [31]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
MILK
Coef : -0.07800429529166508
Coef/err : 3.4987482014331537
CI_up : -0.11907852189898538
CI_down : -0.035014923994007185
Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 27.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
CHEESE
Coef : -0.0947775261385293
Coef/err : 3.2596637359269685
CI_up : -0.14747196583965083
CI_down : -0.03882606278052603
Size of the control group: 27.
Size of the treatment group: 9.
UNPREP MEAT/POULTRY/SEAFOOD-FRZN
Coef : -0.048480308554737395
Coef/err : 1.2403480695834481
CI_up : -0.12045739164558544
CI_down : 0.029386995704542507
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Size of the treatment group: 9.
Size of the control group: 28.
Si

##### VA

In [32]:
state = "VA"

In [33]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)&(nielsen.store_state == state)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 95.
Size of the treatment group: 10.
Size of the control group: 95.
Size of the treatment group: 10.
BREAD AND BAKED GOODS
Coef : 0.07046983675354812
Coef/err : 1.8771795873301251
CI_up : -0.003039142388288174
CI_down : 0.14939885819014354
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 95.
Size of the treatment group: 10.
SNACKS
Coef : 0.06780540090502085
Coef/err : 1.7355046995743726
CI_up : -0.008492487601254872
CI_down : 0.14997451854241306
Size of the control group: 94.
Size of the treatment group: 10.
PACKAGED MEATS-DELI
Coef : -0.05093125217700256
Coef/err : 1.2074275185990706
CI_up : -0.1281850378051177
CI_down : 0.033168191822069426
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 93.
Size of the treatment group: 10.
Size of the control group: 92.
Size of the treatment group: 10.
Size of the control group: 94.
Size of the treatment group: 10.
CONDIMENTS, GRAVIES, AND

#### In all states

In [34]:
# We choose to focus on milk prices
for category in categories:
    product_group = nielsen[(nielsen.product_group_descr == category)]


    # The control group is composed by all states where nothing (no entry nor exit) happened.
    control = product_group[~np.isin(product_group.guessed_store_county_fips, movements)].copy()
    print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


    # The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
    count = movements.groupby('County_fips').count()
    count = count[count.State == 1] # No more than one movement in the treatement group
    treatment_movements = movements[(np.isin(movements.County_fips, count.index))]
    treatment_movements = treatment_movements[(treatment_movements.Opening_date>='2015-01-31' ) & (treatment_movements.Opening_date<='2017-01-31') & ((treatment_movements.Closing_date>'2017-01-31') | (treatment_movements.Closing_date.apply(str) == 'NaT'))]

    treatment = product_group[np.isin(product_group.guessed_store_county_fips, treatment_movements.County_fips )].copy()
    treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
    print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


    # We create our dummies for the regression
    control['treat'] = False
    control['interaction'] = False

    treatment['treat'] = True
    treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


    # Final dataset for the regression :

    df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]
    df = df[df.upc_price != 0]
    reg0 = smf.ols(formula='np.log(upc_price) ~ treat + interaction', data=df)
    results0 = reg0.fit()
    if abs(results0.params[2] / results0.bse[2]) > 1. :
        print("=========================================================")
        print(category)
        print(f"Coef : {np.exp(results0.params[2])-1}")
        print(f"Coef/err : {abs(results0.params[2] / results0.bse[2])}")
        print(f"CI_up : {np.exp(results0.conf_int(alpha=0.05)[0][2])-1}")
        print(f"CI_down : {np.exp(results0.conf_int(alpha=0.05)[1][2])-1}")

Size of the control group: 1831.
Size of the treatment group: 91.
FRESH PRODUCE
Coef : -0.019621247262986752
Coef/err : 1.5430619774681942
CI_up : -0.04399037238436332
CI_down : 0.005369058066232002
Size of the control group: 1836.
Size of the treatment group: 91.
Size of the control group: 1822.
Size of the treatment group: 91.
MILK
Coef : -0.0480801454734886
Coef/err : 4.722368379334626
CI_up : -0.06735054500610205
CI_down : -0.028411581018183085
Size of the control group: 1837.
Size of the treatment group: 91.
Size of the control group: 1816.
Size of the treatment group: 91.
Size of the control group: 1816.
Size of the treatment group: 91.
CHEESE
Coef : -0.027670775680054716
Coef/err : 2.1548667078264567
CI_up : -0.0521740931634157
CI_down : -0.0025339952755394357
Size of the control group: 1794.
Size of the treatment group: 90.
Size of the control group: 1815.
Size of the treatment group: 91.
CARBONATED BEVERAGES
Coef : -0.019389046844327407
Coef/err : 1.029692451449872
CI_up : -0.