## Estimating Demand function

In [1]:
import pandas as pd
data = pd.read_csv('Data-GP1.csv')
data

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,911202,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,911203,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,911204,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,911205,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,911206,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,920504,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,920505,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,920506,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,920507,0,1,0.223143,8.764053,0,0,2.813411


In [2]:
#Defining variables
y = data[['q']] # Quantity estimate for demand function
y1 = data[['p']] # Price

### Equation that we are estimating is Qd = beta0 + beta1 x price + sumprod(Days x gamma) + u1

In [3]:
z1 = data[['Mon', 'Tue', 'Wed', 'Thu']] #Exogenous variables
print(z1)

     Mon  Tue  Wed  Thu
0      1    0    0    0
1      0    1    0    0
2      0    0    1    0
3      0    0    0    1
4      0    0    0    0
..   ...  ...  ...  ...
106    1    0    0    0
107    0    1    0    0
108    0    0    1    0
109    0    0    0    1
110    0    0    0    0

[111 rows x 4 columns]


In [4]:
# Assuming that price is the endogenous variable
# We can assume that price is related to certain weather conditions. Here considering - Stormy, Mixed, Rainy, Cold, Wind
# Hence we consider these as the instrument variables
listz = ['Mon','Tue','Wed','Thu','Stormy','Wind','Rainy']

In [5]:
import statsmodels.api as sm
z = data[listz]
z = sm.add_constant(z)
reduced_form = sm.OLS(y1, z).fit()
print(reduced_form.summary())

                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.217
Model:                            OLS   Adj. R-squared:                  0.164
Method:                 Least Squares   F-statistic:                     4.087
Date:                Sun, 18 Sep 2022   Prob (F-statistic):           0.000537
Time:                        23:42:51   Log-Likelihood:                -36.557
No. Observations:                 111   AIC:                             89.11
Df Residuals:                     103   BIC:                             110.8
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.8101      0.689     -2.626      0.0

In [6]:
# Here, p values of Wind is less than 0.05. So price is heavily dependant on this instrument variable
# Their F- statistic is significant or they are highly correlated to the price.

In [7]:
hat_y1_arr = reduced_form.fittedvalues
hat_y1 = pd.DataFrame(hat_y1_arr, columns = ['hat_y1'])
hat_y1

Unnamed: 0,hat_y1
0,-0.131785
1,-0.049562
2,-0.265101
3,0.110907
4,0.061365
...,...
106,-0.350704
107,-0.254665
108,-0.225070
109,-0.171106


In [8]:
#Updating structural model using 2nd stage linear regression

X = pd.concat([hat_y1, z1], axis=1)
X = sm.add_constant(X)
structuralModel = sm.OLS(y, X).fit()

print(structuralModel.summary())
print('-------------------------------')

                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.207
Model:                            OLS   Adj. R-squared:                  0.169
Method:                 Least Squares   F-statistic:                     5.479
Date:                Sun, 18 Sep 2022   Prob (F-statistic):           0.000161
Time:                        23:42:51   Log-Likelihood:                -110.96
No. Observations:                 111   AIC:                             233.9
Df Residuals:                     105   BIC:                             250.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.5048      0.156     54.446      0.0

In [9]:
# Hausman test
hat_v = reduced_form.resid
hat_v = pd.DataFrame(hat_v, columns = ['hat_v'])
print(hat_v)
struc_var = pd.concat([y1, z1, hat_v], axis=1)
struc_var = sm.add_constant(struc_var)
print(struc_var)

        hat_v
0   -0.298998
1    0.049562
2    0.337422
3    0.136232
4    0.602962
..        ...
106 -0.447804
107  0.167654
108  0.409993
109  0.394250
110  0.754202

[111 rows x 1 columns]
     const         p  Mon  Tue  Wed  Thu     hat_v
0      1.0 -0.430783    1    0    0    0 -0.298998
1      1.0  0.000000    0    1    0    0  0.049562
2      1.0  0.072321    0    0    1    0  0.337422
3      1.0  0.247139    0    0    0    1  0.136232
4      1.0  0.664327    0    0    0    0  0.602962
..     ...       ...  ...  ...  ...  ...       ...
106    1.0 -0.798508    1    0    0    0 -0.447804
107    1.0 -0.087011    0    1    0    0  0.167654
108    1.0  0.184922    0    0    1    0  0.409993
109    1.0  0.223143    0    0    0    1  0.394250
110    1.0  0.561118    0    0    0    0  0.754202

[111 rows x 7 columns]


In [10]:
# Hausman test
structural_equation = sm.OLS(y, struc_var).fit()
print(structural_equation.summary())

                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.242
Model:                            OLS   Adj. R-squared:                  0.199
Method:                 Least Squares   F-statistic:                     5.547
Date:                Sun, 18 Sep 2022   Prob (F-statistic):           5.07e-05
Time:                        23:42:51   Log-Likelihood:                -108.42
No. Observations:                 111   AIC:                             230.8
Df Residuals:                     104   BIC:                             249.8
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.5048      0.153     55.442      0.0

In [16]:
# Here p value of the coef of residual terms is more than 0.05
# So we can not reject the null hypothesis that the price is indeed endogenous

In [12]:
# Sargan Test
hat_u = structuralModel.resid
SarganModel = sm.OLS(hat_u, z).fit()
Rsquared = SarganModel.rsquared
print(Rsquared)

0.0019090428770270895


In [13]:
from scipy import stats
N = 111 # n is number of sample points
pval = 1 - stats.chi2.cdf(N*Rsquared,1)
print(pval)

0.6452793315635366


In [17]:
# p value is big, i.e. more than 0.05. So we can consider Stormy, Wind and Rainy as instrument variables

In [21]:
structuralModel.params

const     8.504826
hat_y1   -1.125404
Mon      -0.025829
Tue      -0.530926
Wed      -0.566469
Thu       0.109565
dtype: float64

### The Demand function can be considered as Qd = 8.5 - 1.12price - 0.02Mon - 0.53Tue - 0.57Wed + 0.11Thu