# 2_Extract_8_Macroeconomic_Variable

8 macro-economic variables from paper \<A Comprehensive Look at The Empirical Performance of Equity Premium Prediction >, downloaded the original dataset from Amit Goyal’s website (https://sites.google.com/view/agoyal145)

(1) dp : Divident-price ratio : The Dividend Price Ratio (d/p) is the difference between the log of dividends and the log of prices. 

month["dp"] = np.log(month["D12"]) - np.log(month["Index"])

(2) ep Earnings-price ratio: Earnings Price Ratio (e/p) is the difference between the log of earnings and the log of prices.

month["ep"] = np.log(month["E12"]) - np.log(month["Index"])

(3) b/m : The Book-to-Market Ratio (b/m) is the ratio of book value to market value for the Dow Jones Industrial Average. For the months from March to December, this is computed by dividing book value at the end of the previous year by the price at the end of the current month. For the months of January and February, this is computed by dividing book value at the end of two years ago by the price at the end of the current month.

(4) nitis :  Net `Equity Expansion (ntis)` is the ratio of 12-month moving sums of net issues by NYSE listed stocks divided by the total end-of-year market capitalization of NYSE stocks

(5) tbl: `Treasury Bills (tbl)` : Treasury-bill rates from 1920 to 1933 are the U.S. Yields On Short-Term United States Securities, Three-Six Month Treasury Notes and Certificates, Three Month Treasury series in the NBER Macrohistory data base. Treasury-bill rates from 1934 to 2005 are the 3- Month Treasury Bill: Secondary Market Rate from the economic research data base at the Federal Reserve Bank at St. Louis (FRED

(6)tms: `The Term Spread (tms)` is the difference between the long term yield on government bonds and the Treasury-bill. 

month['tms'] = month['lty'] - month['tbl']

where lty is Long Term Yield (lty) : Our long-term government bond yield data 

(7) dfy: The `Default Yield Spread (dfy)` is the difference between BAA and AAA-rated corporate bond yields. 

month['dfy'] = month['BAA'] - month['AAA']

(8) svar: `Stock Variance (svar)` : Stock Variance is computed as sum of squared daily returns on the S&P 500.

In [1]:
import pandas as pd
import numpy as np

month = pd.read_excel('PredictorData2021.xlsx', index_col = 'yyyymm')
month

  warn("""Cannot parse header or footer so it will be ignored""")


Unnamed: 0_level_0,Index,D12,E12,b/m,tbl,AAA,BAA,lty,ntis,Rfree,infl,ltr,corpr,svar,csp,CRSP_SPvw,CRSP_SPvwx
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
187101,4.44,0.260000,0.400000,,,,,,,,,,,,,,
187102,4.50,0.260000,0.400000,,,,,,,0.004967,,,,,,,
187103,4.61,0.260000,0.400000,,,,,,,0.004525,,,,,,,
187104,4.74,0.260000,0.400000,,,,,,,0.004252,,,,,,,
187105,4.86,0.260000,0.400000,,,,,,,0.004643,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202108,4522.68,58.791336,169.833333,0.184756,0.0005,0.0255,0.0324,0.0128,0.014846,0.000000,0.002066,-0.0035,-0.0045,0.000602,,0.030600,0.029205
202109,4307.54,59.254483,175.370000,0.193036,0.0004,0.0253,0.0323,0.0137,0.015598,0.000000,0.002716,-0.0250,-0.0194,0.001393,,-0.046076,-0.047152
202110,4605.38,59.635361,182.860000,0.182389,0.0005,0.0268,0.0335,0.0158,0.013368,0.000000,0.008308,0.0051,0.0159,0.001151,,0.070510,0.069627
202111,4567.00,60.016239,190.350000,0.189455,0.0005,0.0262,0.0328,0.0156,0.015640,0.000100,0.004913,0.0210,0.0094,0.001327,,-0.007256,-0.008665


In [2]:
variable = ['dp',
            'ep',
            'b/m',
            'ntis',
            'tbl',
            'tms',
            'dfy',
            'svar',
           ]
tcodes = [2,2,5,2,2,1,2,5]

In [3]:
month["dp"] = np.log(month["D12"]) - np.log(month["Index"])
month["ep"] = np.log(month["E12"]) - np.log(month["Index"])
month['tms'] = month['lty'] - month['tbl']
month['dfy'] = month['BAA'] - month['AAA']

In [4]:
month_new = month[variable]
month_new

Unnamed: 0_level_0,dp,ep,b/m,ntis,tbl,tms,dfy,svar
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
187101,-2.837728,-2.406945,,,,,,
187102,-2.851151,-2.420368,,,,,,
187103,-2.875302,-2.444519,,,,,,
187104,-2.903111,-2.472328,,,,,,
187105,-2.928112,-2.497329,,,,,,
...,...,...,...,...,...,...,...,...
202108,-4.342866,-3.282042,0.184756,0.014846,0.0005,0.0123,0.0069,0.000602
202109,-4.286281,-3.201224,0.193036,0.015598,0.0004,0.0133,0.0070,0.001393
202110,-4.346732,-3.226260,0.182389,0.013368,0.0005,0.0153,0.0067,0.001151
202111,-4.331997,-3.177747,0.189455,0.015640,0.0005,0.0151,0.0066,0.001327


In [5]:
def transxf(x,tcode) :
    n=x.size
    small=1e-6
    if tcode==1: #  no transformation): x(t)
        y=x
        result=y
    elif tcode==2: # First difference: x(t)-x(t-1)
        y = x.diff()
        result= y
    elif tcode==3:  #Second difference: (x(t)-x(t-1))-(x(t-1)-x(t-2))
        y = x.diff()
        result= y ** 2
    elif tcode==4:    #Natural log: ln(x)
        if min(x) < small:
            y=np.nan
        else :
            y=np.log(x)
        result= y
    elif tcode==5:   #First difference of natural log: ln(x)-ln(x-1)
        y = np.log(x) - np.log(x.shift())
        result=y
    elif tcode==6:   #First difference of natural log: ln(x)-ln(x-1)
        #y = np.log(x) - np.log(x.shift())
        ln_diff1 = np.log(x) - np.log(x.shift())
        ln_diff2 = np.log(x.shift()) - np.log(x.shift(periods=2))
        result=ln_diff1 - ln_diff2
        #result=y**2 
    elif tcode==7 :  #First difference of percent change: (x(t)/x(t-1)-1)-(x(t-1)/x(t-2)-1)
        y=(x/x.shift(1) - 1) - (x.shift(1)/x.shift(2) - 1)
        result=y
    return result 

In [6]:
yt=[]                #Initialize output variable                   
N=month_new.shape[1]  #Number of series kept
for i in range(0,N):   
    dum=transxf(month_new.iloc[:,i],tcodes[i])
    yt.append(dum)
trans_macro = pd.DataFrame(yt).T

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [17]:
trans_macro.to_csv('macro8_transformed.csv')

In [7]:
trans_macro8 = trans_macro.copy()
trans_macro8 =trans_macro8.loc['196612': '201611']
trans_macro8.to_csv('trans_macro8.csv')
trans_macro8 

Unnamed: 0_level_0,dp,ep,b/m,ntis,tbl,tms,dfy,svar
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
196612,-0.000829,0.003897,0.007481,-0.000342,-0.0036,-0.0041,1.000000e-04,-0.211255
196701,-0.071794,-0.081296,-0.078545,-0.001510,-0.0024,-0.0024,-2.000000e-04,0.156957
196702,0.001505,-0.008023,0.012455,0.002003,-0.0016,0.0009,2.000000e-04,-0.575808
196703,-0.035199,-0.044750,0.017443,-0.002780,-0.0030,0.0029,-7.000000e-04,0.194875
196704,-0.041372,-0.048738,-0.035250,0.000125,-0.0042,0.0093,-6.938894e-18,0.513062
...,...,...,...,...,...,...,...,...
201607,-0.030757,-0.026703,-0.027627,-0.002990,0.0003,0.0145,-9.000000e-04,-1.791211
201608,0.005435,0.009439,0.001703,0.000893,0.0000,0.0156,-2.000000e-04,-0.537110
201609,0.005433,0.009388,0.005052,-0.001821,-0.0001,0.0167,-2.000000e-04,1.790645
201610,0.024601,0.039840,0.009093,0.003569,0.0004,0.0187,-3.000000e-04,-1.525144


In [8]:
trans_macro8.describe().to_csv('trans_macro8_desc.csv')

In [9]:
Macro_markus = pd.read_csv('Macro.csv', index_col = 'Date')
Macro_markus = Macro_markus[variable]

In [10]:
Macro_markus

Unnamed: 0_level_0,dp,ep,b/m,ntis,tbl,tms,dfy,svar
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1/1/67,-0.000829,0.003897,0.007481,-0.000341,-0.0036,-0.0041,0.0001,-0.211208
2/1/67,-0.071794,-0.081296,-0.078545,-0.001510,-0.0024,-0.0024,-0.0002,0.156913
3/1/67,0.001505,-0.008023,0.012455,0.002004,-0.0016,0.0009,0.0002,-0.575802
4/1/67,-0.035199,-0.044750,0.017443,-0.002780,-0.0030,0.0029,-0.0007,0.194894
5/1/67,-0.041372,-0.048738,-0.035250,0.000124,-0.0042,0.0093,0.0000,0.513076
...,...,...,...,...,...,...,...,...
8/1/16,-0.030757,-0.026703,-0.027627,-0.002984,0.0003,0.0145,-0.0009,-1.791255
9/1/16,0.005435,0.009439,0.001703,0.000941,0.0000,0.0156,-0.0002,-0.537058
10/1/16,0.005433,0.009388,0.005052,-0.001885,-0.0001,0.0167,-0.0002,1.790622
11/1/16,0.024601,0.039840,0.009093,0.003613,0.0004,0.0187,-0.0003,-1.525089


In [11]:
Macro_markus.describe().to_csv('Macro_markus_desc.csv')

In [12]:
Macro_markus.reset_index(inplace = True) 
trans_macro8.reset_index(inplace = True) 

In [13]:
Macro_markus

Unnamed: 0,Date,dp,ep,b/m,ntis,tbl,tms,dfy,svar
0,1/1/67,-0.000829,0.003897,0.007481,-0.000341,-0.0036,-0.0041,0.0001,-0.211208
1,2/1/67,-0.071794,-0.081296,-0.078545,-0.001510,-0.0024,-0.0024,-0.0002,0.156913
2,3/1/67,0.001505,-0.008023,0.012455,0.002004,-0.0016,0.0009,0.0002,-0.575802
3,4/1/67,-0.035199,-0.044750,0.017443,-0.002780,-0.0030,0.0029,-0.0007,0.194894
4,5/1/67,-0.041372,-0.048738,-0.035250,0.000124,-0.0042,0.0093,0.0000,0.513076
...,...,...,...,...,...,...,...,...,...
595,8/1/16,-0.030757,-0.026703,-0.027627,-0.002984,0.0003,0.0145,-0.0009,-1.791255
596,9/1/16,0.005435,0.009439,0.001703,0.000941,0.0000,0.0156,-0.0002,-0.537058
597,10/1/16,0.005433,0.009388,0.005052,-0.001885,-0.0001,0.0167,-0.0002,1.790622
598,11/1/16,0.024601,0.039840,0.009093,0.003613,0.0004,0.0187,-0.0003,-1.525089


In [14]:
trans_macro8

Unnamed: 0,yyyymm,dp,ep,b/m,ntis,tbl,tms,dfy,svar
0,196612,-0.000829,0.003897,0.007481,-0.000342,-0.0036,-0.0041,1.000000e-04,-0.211255
1,196701,-0.071794,-0.081296,-0.078545,-0.001510,-0.0024,-0.0024,-2.000000e-04,0.156957
2,196702,0.001505,-0.008023,0.012455,0.002003,-0.0016,0.0009,2.000000e-04,-0.575808
3,196703,-0.035199,-0.044750,0.017443,-0.002780,-0.0030,0.0029,-7.000000e-04,0.194875
4,196704,-0.041372,-0.048738,-0.035250,0.000125,-0.0042,0.0093,-6.938894e-18,0.513062
...,...,...,...,...,...,...,...,...,...
595,201607,-0.030757,-0.026703,-0.027627,-0.002990,0.0003,0.0145,-9.000000e-04,-1.791211
596,201608,0.005435,0.009439,0.001703,0.000893,0.0000,0.0156,-2.000000e-04,-0.537110
597,201609,0.005433,0.009388,0.005052,-0.001821,-0.0001,0.0167,-2.000000e-04,1.790645
598,201610,0.024601,0.039840,0.009093,0.003569,0.0004,0.0187,-3.000000e-04,-1.525144


In [19]:
for i in Macro_markus.columns.tolist()[1:]:
    print(Macro_markus[i].corr(trans_macro8[i]))

0.9995474693697404
0.9997966571584396
0.9999999999999991
0.9997256637755889
0.9999984092084159
0.9999999261109785
1.0
0.9999999993189495


---

In [18]:
# merge dataset 
current_trans = pd.read_csv('current_transformed.csv', index_col = 'sasdate')
macro8_trans = pd.read_csv('macro8_transformed.csv', index_col ='yyyymm')

In [24]:
current_trans = current_trans.loc['02/01/1980':'12/01/2021']
macro8_trans = macro8_trans.loc['198002':'202112']
macro8_trans.set_index(current_trans.index, inplace =True)
total_macro = pd.concat([current_trans, macro8_trans], axis=1)

In [33]:
total_macro.to_csv('total_macro.csv')