In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf       # to perform the econometric tests

In [3]:
abspath = 'D:/OneDrive/Data/Brooks4e/'
data = pd.read_excel(abspath + 'SandPhedge.xls', index_col=0)
data.head()

Unnamed: 0_level_0,Spot,Futures
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1997-09-01,947.280029,954.5
1997-10-01,914.619995,924.0
1997-11-01,955.400024,955.0
1997-12-01,970.429993,979.25
1998-01-01,980.280029,987.75


In [4]:
# Linear regression with constant term
formula = 'Spot ~ Futures'
results = smf.ols(formula, data).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                   Spot   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.005e+06
Date:                Thu, 23 Dec 2021   Prob (F-statistic):               0.00
Time:                        21:48:06   Log-Likelihood:                -826.86
No. Observations:                 247   AIC:                             1658.
Df Residuals:                     245   BIC:                             1665.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.8378      1.489     -1.906      0.0

In [5]:
# To test the null hypothesis that beta = 1
hypotheses = 'Futures = 1'
f_test = results.f_test(hypotheses)
print(f_test)

<F test: F=array([[2.58463856]]), p=0.10919294686216163, df_denom=245, df_num=1>


In [6]:
# Linear regression WITHOUT constant term
formula1 = 'Spot ~ Futures - 1'         # Specify the formula without the constant term
results1 = smf.ols(formula1, data).fit()
print(results1.summary())

                                 OLS Regression Results                                
Dep. Variable:                   Spot   R-squared (uncentered):                   1.000
Model:                            OLS   Adj. R-squared (uncentered):              1.000
Method:                 Least Squares   F-statistic:                          1.136e+07
Date:                Thu, 23 Dec 2021   Prob (F-statistic):                        0.00
Time:                        21:48:18   Log-Likelihood:                         -828.68
No. Observations:                 247   AIC:                                      1659.
Df Residuals:                     246   BIC:                                      1663.
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [7]:
# Create a user-defined function to calculate logarithmic returns
def LogDiff(x):
    x_diff = 100*np.log(x/x.shift(1))
    x_diff = x_diff.dropna()
    return x_diff

In [8]:
# Create a new dataframe for returns of spot and futures
data = pd.DataFrame({'ret_spot': LogDiff(data['Spot']),
                    'ret_future': LogDiff(data['Futures'])})
data.head()

Unnamed: 0_level_0,ret_spot,ret_future
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1997-10-01,-3.508608,-3.247557
1997-11-01,4.362145,3.299927
1997-12-01,1.560914,2.507563
1998-01-01,1.009901,0.864266
1998-02-01,6.807837,6.159189


In [9]:
# Summary statistics
data.describe()

Unnamed: 0,ret_spot,ret_future
count,246.0,246.0
mean,0.416776,0.414017
std,4.333323,4.419049
min,-18.563647,-18.944697
25%,-1.831388,-1.9314
50%,0.918522,0.997641
75%,3.276468,3.133588
max,10.230659,10.387184


In [10]:
# Linear regression of Spot Returns against Futures Returns
formula2 = 'ret_spot ~ ret_future'
results2 = smf.ols(formula2, data).fit()
print(results2.summary())

                            OLS Regression Results                            
Dep. Variable:               ret_spot   R-squared:                       0.989
Model:                            OLS   Adj. R-squared:                  0.989
Method:                 Least Squares   F-statistic:                 2.147e+04
Date:                Thu, 23 Dec 2021   Prob (F-statistic):          7.54e-240
Time:                        21:48:42   Log-Likelihood:                -157.16
No. Observations:                 246   AIC:                             318.3
Df Residuals:                     244   BIC:                             325.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0131      0.029      0.444      0.6

In [11]:
# To test the null hypothesis that beta = 1
hypotheses2 = 'ret_future = 1'
f_test2 = results2.f_test(hypotheses2)
print(f_test2)

<F test: F=array([[14.02980961]]), p=0.00022456631728795146, df_denom=244, df_num=1>
