In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [2]:
nielsen15 = pd.read_csv('../../Nielsen/aggregated_nielsen_2015.csv')
nielsen16 = pd.read_csv('../..//Nielsen/aggregated_nielsen_2016.csv')
nielsen15['year'] = 2015
nielsen16['year'] = 2016

In [3]:
nielsen = pd.concat((nielsen15, nielsen16))

In [4]:
## Entriy/exit dates
fandom = pd.read_csv('../data_collection/plein_de_data/fandom_traitées.csv', parse_dates=['Opening_date', 'Closing_date'])[['State', 'County_name', 'County_fips', 'Opening_date', 'Closing_date']]

# We drop the state in which we do not trust our data (some mistakes stil remain)
fandom = fandom[~np.isin(fandom.State, ('CA', 'GA', 'KS', 'LA', 'TX'))]

# We concentrate our study on the movements (entries & exits) during the fiscal years 2015 and 2016
movements = fandom[((fandom.Opening_date >= '2015-01-31') & (fandom.Opening_date <= '2017-01-31')) | ((fandom.Closing_date >= '2015-01-31') & (fandom.Closing_date <= '2017-01-31'))]
#movements['year'] = movements.Opening_date.dt.year
#movements['month'] = movements.Opening_date.dt.month

## FIRST MODEL : one month

Regression model :
$$ Price_{i, t} = \alpha + \beta \cdot treat_i + \gamma \cdot post_t + \delta \cdot treat_i \cdot post_{t} + \varepsilon_{i, t}$$

In [10]:
year = 2016
month = 1

In [11]:
# We choose to focus on milk prices
milk = nielsen[nielsen.product_group_descr == 'MILK']


# The control group is composed by all states where nothing (no entry nor exit) happened.
control = milk[~np.isin(milk.guessed_store_county_fips, movements)].copy()
print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


# The treatment group is composed by the states where one entry took place in august 2016 and where this entry is the only movement
count = movements.groupby('County_fips').count()
count = count[count.Opening_date + count.Closing_date == 1] # No more than one movement in the treatement group
treatment_movements = movements[(np.isin(movements.County_fips, count.index)) & (movements.Opening_date.dt.year == year) & (movements.Opening_date.dt.month == month)]

treatment = milk[np.isin(milk.guessed_store_county_fips, treatment_movements.County_fips )].copy()
print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


# We create our dummies for the regression
control['treat'] = False
control['post'] = (control.purchase_month > month) & (control.purchase_year == year)
treatment['treat'] = True
treatment['post'] = (treatment.purchase_month > month) & (treatment.purchase_year == year)


# Final dataset for the regression :

df = pd.concat((control, treatment))[['upc_price', 'treat', 'post']]

Size of the control group: 2322.
Size of the treatment group: 15.


In [12]:
reg = smf.ols(formula='upc_price ~ treat * post', data=df)
results = reg.fit()

In [13]:
results.summary()

0,1,2,3
Dep. Variable:,upc_price,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.019
Method:,Least Squares,F-statistic:,505.4
Date:,"Wed, 26 Oct 2022",Prob (F-statistic):,0.0
Time:,09:46:08,Log-Likelihood:,-71742.0
No. Observations:,77028,AIC:,143500.0
Df Residuals:,77024,BIC:,143500.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.8089,0.003,930.455,0.000,2.803,2.815
treat[T.True],-0.1182,0.032,-3.716,0.000,-0.181,-0.056
post[T.True],-0.1704,0.004,-38.184,0.000,-0.179,-0.162
treat[T.True]:post[T.True],-0.0485,0.047,-1.032,0.302,-0.141,0.044

0,1,2,3
Omnibus:,47992.647,Durbin-Watson:,0.975
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4283563.24
Skew:,2.166,Prob(JB):,0.0
Kurtosis:,39.275,Cond. No.,26.8


## SECOND MODEL : all months

Regression model :
$$ Price_{i, t} = \alpha + \beta \cdot treat_i + \delta \cdot treat_i \cdot post_{t} + \varepsilon_{i, t}$$

In [14]:
# We choose to focus on milk prices
milk = nielsen[nielsen.product_group_descr == 'MILK']


# The control group is composed by all states where nothing (no entry nor exit) happened.
control = milk[~np.isin(milk.guessed_store_county_fips, movements)].copy()
print(f"Size of the control group: {len(control.guessed_store_county_fips.unique())}.")


# The treatment group is composed by the states where one entry took place in 2016 and where this entry is the only movement
count = movements.groupby('County_fips').count()
count = count[count.Opening_date + count.Closing_date == 1] # No more than one movement in the treatement group
treatment_movements = movements[(np.isin(movements.County_fips, count.index))]

treatment = milk[np.isin(milk.guessed_store_county_fips, treatment_movements.County_fips )].copy()
treatment = treatment.merge(treatment_movements, left_on='guessed_store_county_fips', right_on='County_fips')
print(f"Size of the treatment group: {len(treatment.guessed_store_county_fips.unique())}.")


# We create our dummies for the regression
control['treat'] = False
control['interaction'] = False
treatment['treat'] = True
treatment['interaction'] = (treatment.purchase_month > treatment.Opening_date.dt.month) & (treatment.purchase_year >= treatment.Opening_date.dt.year)


# Final dataset for the regression :

df = pd.concat((control, treatment))[['upc_price', 'treat', 'interaction']]

Size of the control group: 2322.
Size of the treatment group: 89.


In [15]:
reg = smf.ols(formula='upc_price ~ treat + interaction', data=df)
results = reg.fit()

In [16]:
results.summary()

0,1,2,3
Dep. Variable:,upc_price,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,25.32
Date:,"Wed, 26 Oct 2022",Prob (F-statistic):,1.02e-11
Time:,09:47:31,Log-Likelihood:,-74860.0
No. Observations:,80314,AIC:,149700.0
Df Residuals:,80311,BIC:,149800.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.7309,0.002,1227.711,0.000,2.727,2.735
treat[T.True],0.0308,0.012,2.529,0.011,0.007,0.055
interaction[T.True],-0.1419,0.021,-6.900,0.000,-0.182,-0.102

0,1,2,3
Omnibus:,47829.939,Durbin-Watson:,0.939
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3886434.153
Skew:,2.039,Prob(JB):,0.0
Kurtosis:,36.834,Cond. No.,10.2
