# Hypothesis Testing

## Importing useful libraries

In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.iolib.summary2 import summary_col
import seaborn as sn
pd.options.mode.chained_assignment = None  # default='warn'

## Importing data

In [3]:
resale_data = pd.read_csv(r"resale-flat-prices-updated.csv")

## Multivariable regression models

### **Hypothesis 1:** Proximity to central Singapore has an effect on resale price of 4-room HDB flat from 2017 onwards.

In [58]:
lm1 = smf.ols("np.log(resale_price) ~ proximity + floor_area_sqm + remain_lease_m", data = resale_data).fit()
print(lm1.summary())
print(summary_col(results = lm1, stars = True, model_names = ['log(resale_price)']))

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.555
Model:                              OLS   Adj. R-squared:                  0.555
Method:                   Least Squares   F-statistic:                 2.143e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:21:40   Log-Likelihood:                 19520.
No. Observations:                 51638   AIC:                        -3.903e+04
Df Residuals:                     51634   BIC:                        -3.900e+04
Df Model:                             3                                         
Covariance Type:              nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         12.237

In [31]:
lm1b = smf.ols("np.log(resale_price) ~ town + floor_area_sqm + remain_lease_m", data = resale_data).fit()
print(lm1b.summary())
print(summary_col(results = lm1b, stars = True, model_names = ['log(resale_price)']))

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.722
Model:                              OLS   Adj. R-squared:                  0.721
Method:                   Least Squares   F-statistic:                     4952.
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          20:55:16   Log-Likelihood:                 31642.
No. Observations:                 51638   AIC:                        -6.323e+04
Df Residuals:                     51610   BIC:                        -6.298e+04
Df Model:                            27                                         
Covariance Type:              nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Interc

### **Hypothesis 2:** The effect of the number of adjacent hospitals on a flat's resale price depends on its location.

#### Linear regression model without interaction term

In [47]:
lm2a = smf.ols("np.log(resale_price) ~ central + n_hospitals + floor_area_sqm + remain_lease_m + average_level ", data = resale_data).fit()
print(lm2a.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.567
Model:                              OLS   Adj. R-squared:                  0.567
Method:                   Least Squares   F-statistic:                 1.354e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:12:50   Log-Likelihood:                 20265.
No. Observations:                 51638   AIC:                        -4.052e+04
Df Residuals:                     51632   BIC:                        -4.046e+04
Df Model:                             5                                         
Covariance Type:              nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         11.828

#### Linear regression model with interaction term `central * n_hospitals`

In [48]:
lm2b = smf.ols("np.log(resale_price) ~ central + n_hospitals + central * n_hospitals + floor_area_sqm + remain_lease_m + average_level", data = resale_data).fit()
print(lm2b.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.568
Model:                              OLS   Adj. R-squared:                  0.568
Method:                   Least Squares   F-statistic:                 1.132e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:13:04   Log-Likelihood:                 20307.
No. Observations:                 51638   AIC:                        -4.060e+04
Df Residuals:                     51631   BIC:                        -4.054e+04
Df Model:                             6                                         
Covariance Type:              nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept     

<u><b>Note:</b></u> Since independent variables are not time-varying, fixed effects are not applied on this model.

In [49]:
print(summary_col([lm2a,lm2b], stars = True, model_names = ['W/o interaction term','With interaction term']))


                    W/o interaction term With interaction term
--------------------------------------------------------------
Intercept           11.8286***           11.8370***           
                    (0.0118)             (0.0118)             
R-squared           0.5673               0.5680               
R-squared Adj.      0.5673               0.5680               
average_level       0.0178***            0.0177***            
                    (0.0002)             (0.0002)             
central             0.3784***            0.3624***            
                    (0.0020)             (0.0027)             
central:n_hospitals                      0.0133***            
                                         (0.0014)             
floor_area_sqm      0.0046***            0.0045***            
                    (0.0001)             (0.0001)             
n_hospitals         0.0257***            0.0228***            
                    (0.0006)             (0.0007)     

### Hypothesis 3: The effect of the number of adjacent MRTs on a flat's resale price depends on its location.

#### Linear regression model without interaction term

In [59]:
lm3a = smf.ols("np.log(resale_price) ~ central + n_mrt + floor_area_sqm + remain_lease_m + average_level ", data = resale_data).fit()
print(lm3a.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.566
Model:                              OLS   Adj. R-squared:                  0.566
Method:                   Least Squares   F-statistic:                 1.344e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:23:28   Log-Likelihood:                 20162.
No. Observations:                 51638   AIC:                        -4.031e+04
Df Residuals:                     51632   BIC:                        -4.026e+04
Df Model:                             5                                         
Covariance Type:              nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         11.802

#### Linear regression model with interaction term `central * n_mrt`

In [60]:
lm3b = smf.ols("np.log(resale_price) ~ central + n_mrt + central * n_mrt + floor_area_sqm + remain_lease_m + average_level ", data = resale_data).fit()
print(lm3b.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.567
Model:                              OLS   Adj. R-squared:                  0.567
Method:                   Least Squares   F-statistic:                 1.126e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:23:37   Log-Likelihood:                 20235.
No. Observations:                 51638   AIC:                        -4.046e+04
Df Residuals:                     51631   BIC:                        -4.039e+04
Df Model:                             6                                         
Covariance Type:              nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         11.778

In [61]:
print(summary_col([lm3a,lm3b], stars = True, model_names = ['W/o interaction term','With interaction term']))


               W/o interaction term With interaction term
---------------------------------------------------------
Intercept      11.8024***           11.7781***           
               (0.0119)             (0.0120)             
R-squared      0.5656               0.5668               
R-squared Adj. 0.5656               0.5668               
average_level  0.0172***            0.0174***            
               (0.0002)             (0.0002)             
central        0.3214***            0.3563***            
               (0.0024)             (0.0038)             
central:n_mrt                       -0.0080***           
                                    (0.0007)             
floor_area_sqm 0.0040***            0.0040***            
               (0.0001)             (0.0001)             
n_mrt          0.0127***            0.0174***            
               (0.0003)             (0.0005)             
remain_lease_m 0.0006***            0.0007***            
             

### Hypothesis 4: The effect of the number of adjacent MRTs on a flat's resale price depends on its location.

#### Linear regression model without interaction term

In [62]:
lm4a = smf.ols("np.log(resale_price) ~ central + average_level + floor_area_sqm + remain_lease_m ", data = resale_data).fit()
print(lm4a.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.552
Model:                              OLS   Adj. R-squared:                  0.552
Method:                   Least Squares   F-statistic:                 1.592e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:35:57   Log-Likelihood:                 19376.
No. Observations:                 51638   AIC:                        -3.874e+04
Df Residuals:                     51633   BIC:                        -3.870e+04
Df Model:                             4                                         
Covariance Type:              nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         11.861

#### Linear regression model with interaction term `central * average_level`

In [63]:
lm4b = smf.ols("np.log(resale_price) ~ central + average_level + central * average_level + floor_area_sqm + remain_lease_m ", data = resale_data).fit()
print(lm4b.summary())

                             OLS Regression Results                             
Dep. Variable:     np.log(resale_price)   R-squared:                       0.554
Model:                              OLS   Adj. R-squared:                  0.554
Method:                   Least Squares   F-statistic:                 1.283e+04
Date:                  Fri, 13 Oct 2023   Prob (F-statistic):               0.00
Time:                          21:36:30   Log-Likelihood:                 19485.
No. Observations:                 51638   AIC:                        -3.896e+04
Df Residuals:                     51632   BIC:                        -3.890e+04
Df Model:                             5                                         
Covariance Type:              nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept 

In [64]:
print(summary_col([lm4a,lm4b], stars = True, model_names = ['W/o interaction term','With interaction term']))


                      W/o interaction term With interaction term
----------------------------------------------------------------
Intercept             11.8617***           11.8803***           
                      (0.0120)             (0.0120)             
R-squared             0.5522               0.5540               
R-squared Adj.        0.5521               0.5540               
average_level         0.0179***            0.0155***            
                      (0.0002)             (0.0003)             
central               0.3760***            0.3241***            
                      (0.0020)             (0.0041)             
central:average_level                      0.0069***            
                                           (0.0005)             
floor_area_sqm        0.0041***            0.0041***            
                      (0.0001)             (0.0001)             
remain_lease_m        0.0006***            0.0006***            
                      (0