In [29]:
%matplotlib inline
%load_ext rpy2.ipython


import datetime as dt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import statsmodels.api as sm
from pykalman import KalmanFilter
import numpy.ma as ma

pd.set_option('max_rows', 20)
# pd.set_option('max_rows', 1000)

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [30]:
%%R

library(MARSS)

# Data

## Regressors

In [31]:
data = pd.read_excel("Nowcasting.xlsx",sheetname="Valores")
# data.head()
series = data.loc[0,:][~data.loc[0,:].isnull()].tolist()
data.columns = data.iloc[0]

In [32]:
# i=data.columns.get_loc("MXWO Index")
# data.iloc[1:,i:i+2]

dfsList = []

for serie in series:
    i = data.columns.get_loc(serie)
    df = data.iloc[1:,i:i+2]
    df.set_index(serie,inplace=True)
    df.columns = [serie]
    df = df.loc[~df.index.duplicated(keep='first')]
    if not df[serie].isnull().all():
        dfsList.append(df.apply(pd.to_numeric))

data = pd.concat(dfsList,axis=1)
data.sort_index(inplace=True)
if data.loc[data.index[-1]].notnull().any() == False:
    data.drop(data.index[-1],inplace=True)
data.tail()

  self.values[0] < other_diff[0]


Unnamed: 0,BZGDINDX Index,BZIPTLYo Index,MPMIBRMA Index,BZGDGNDI Index,BZUETOTN Index,BFOETTOD Index,BZGDFNCE Index,BZREELHT Index,BPPICM Index,BZPIIPCM Index,...,BZRTFBSA INDEX,BZFGCCSA INDEX,ibov index,CL1 Comdty,BCSWLPD CURNCY,bcswfpd curncy,BZMOTRFB INDEX,BZJCGTOT INDEX,BZEANSA INDEX,OEBRD002 INDEX
2017-01-26,,,,,,,,,,,...,,,66190.63,53.78,10.6,10.9178,475.0,,,
2017-01-27,,,,,,,,,,,...,,,66033.98,53.17,10.5096,10.8762,-474.0,,,
2017-01-30,,,,,,,,,,,...,,,64301.73,52.63,10.5724,10.8627,,,,
2017-01-31,,,44.0,,,,,,,,...,,79.3,64670.78,52.81,10.5262,10.8029,,,,
2017-02-01,,,,,,,,,,,...,,,65084.65,53.74,10.4928,10.8046,,,,


In [33]:
data.loc[data.index[-1]].notnull().any()

True

### Transformations


In [34]:
data.columns

Index(['BZGDINDX Index', 'BZIPTLYo Index', 'MPMIBRMA Index', 'BZGDGNDI Index',
       'BZUETOTN Index', 'BFOETTOD Index', 'BZGDFNCE Index', 'BZREELHT Index',
       'BPPICM Index', 'BZPIIPCM Index', 'BZEXTOT$ Index', 'BZTBBALY INDEX',
       'BZBXPBCM INDEX', 'BZRTFBSA INDEX', 'BZFGCCSA INDEX', 'ibov index',
       'CL1 Comdty', 'BCSWLPD CURNCY', 'bcswfpd curncy', 'BZMOTRFB INDEX',
       'BZJCGTOT INDEX', 'BZEANSA INDEX', 'OEBRD002 INDEX'],
      dtype='object')

In [35]:
transf = pd.read_excel("Nowcasting.xlsx",sheetname="Plan2",)
transf.set_index("Code",inplace=True)

In [27]:
for s in transf.index[transf["Log"]==True]:
    data[s] = data[s].apply(np.log)
    
data = data.resample("M").sum()
    
for s in transf.index[transf["Diff"]==True]:
    data[s] = data[s].diff()

In [28]:
d = data["BZGDINDX Index"].dropna()
# d[d.notnull()]
# print(d.last_valid_index())
d

Series([], Freq: M, Name: BZGDINDX Index, dtype: float64)

### Stationarity

In [None]:
for series in data.columns:
    d = data[series]   
    print(series)
    pv = sm.tsa.stattools.adfuller(d[d.notnull()])[1]
    if pv > 0.05:
        print(series + " 's p-value is: " + str(pv))

## Response Variable

In [None]:
gdp = pd.read_excel('GDP.xlsx',dayFirst=True)
gdp.set_index("Date", inplace=True)
gdp["GDP"] = np.log(gdp["GDP"]) - np.log(gdp["GDP"].shift(1))

# Balanced Panel

In [None]:
balancedIndex = ~data.isnull().any(axis=1)
data[balancedIndex]

In [None]:
n_components=1
pca = PCA(n_components=n_components)
components = pca.fit_transform(data[balancedIndex])
components = pd.DataFrame(components)
components.set_index(data[balancedIndex].index, inplace=True)

print(pca.explained_variance_ratio_) 

## MARSS R-Package

In [None]:
balancedPanel = data[balancedIndex].transpose().as_matrix()
initial = components.transpose().as_matrix()

zs = []
for i in range(0,26):
    zs.append('z'+str(i))

In [None]:
%%R -i balancedPanel,zs,initial -o states

# class(meas)

Z="unconstrained"
# B="diagonal and unequal"
# B="identity"
B="unconstrained"
# x0=matrix(c("pi1"),1,1)
# x0=matrix(c("pi1","pi2"),2,1)

Z=matrix(list("z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13"),13,1)
# Z=matrix(zs,13,2)

x0="zero"
x0=initial

model.gen=list(Z=Z,B=B,A="zero",x0="zero",U="zero")
# model.gen=list(Z=Z,B=B,x0=x0,A="zero")
kemfit = MARSS(balancedPanel, model=model.gen,control=list(conv.test.slope.tol=0.00001,abstol=0.00001))
states = kemfit$states
# print(kemfit, what="model")
# print(kemfit, what="start")
# print(kemfit, what="states")
# print(kemfit, what="ytT")
# print(kemfit, what="states.se")
# print(kemfit, what="kfs")
# print(kemfit$par$Z)

In [None]:
factor = pd.DataFrame(states).transpose().set_index(data[balancedIndex].index)
factor.plot()

In [None]:
gdp[balancedIndex].plot()

# OLS - Nowcast

In [None]:
factor = factor.rolling(window=3).sum()
olsSample = gdp.join(factor, how="inner")
gdpComp = olsSample[0]

In [None]:
# X = sm.add_constant(gdpComp)
X = gdpComp
model = sm.OLS(olsSample["GDP"],X)
results = model.fit()
results.params
print(results.summary())

The model is not a good fit. We believe that could be due to the lack of specification of initial value for the parameters. Thus we will try do find initial value for the parameters running a VAR on PCA components, as in Banbura et al. 2013

# PCA to retrieve factors from balanced panel

In [None]:
n_components=5
pca = PCA(n_components=n_components)
components = pca.fit_transform(data[balancedIndex])
components = pd.DataFrame(components)
components.set_index(data[balancedIndex].index, inplace=True)

print(pca.explained_variance_ratio_) 

In [None]:
sum(pca.explained_variance_ratio_)

In [None]:
pd.DataFrame(components).plot()

# Initial Parameters Estimation 
## VAR - for B-Matrix Estimation

In [None]:
varModel = sm.tsa.VAR(components)
results = varModel.fit(1)
results.summary()

In [None]:
initialB = pd.DataFrame(results.coefs[0])

## OLS For Z-Matrix estimation

In [None]:
results = sm.OLS(data[balancedIndex]['ConsumerConfidence'], components).fit()
results.params

In [None]:
dfList = []
for col in data[balancedIndex].columns:
    results = sm.OLS(data[balancedIndex][col], components).fit()
    dfList.append(results.params)

initialZ = pd.concat(dfList,axis=1).transpose()
initialZ

# Re-Estimation of Factor Model by EM Algo

In [None]:
initialB.shape

In [None]:
initialZ = np.reshape(initialZ.stack().as_matrix(),(65,1))
initialB = np.reshape(initialB.stack().as_matrix(),(25,1))
print(initialZ.shape)
print(initialB.shape)

In [None]:
numberOfFactors = 5
zs = []
for i in range(0,13*numberOfFactors):
    zs.append('z'+str(i))
    


In [None]:
%%R -i balancedPanel,zs,initialB,initialZ,numberOfFactors -o states

# class(meas)
print(dim(initialZ))
Z="unconstrained"
# B="diagonal and unequal"
# B="identity"
B="unconstrained"
# x0=matrix(c("pi1"),1,1)
# x0=matrix(c("pi1","pi2"),2,1)

# Z=matrix(list("z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13"),13,1)
Z=matrix(zs,13,numberOfFactors)
# print(Z)
# print(numberOfFactors)
inits = list(Z=initialZ, B=initialB)

x0="zero"
x0=initial

model.gen=list(Z=Z,B=B,A="zero",x0="zero",U="zero")
# model.gen=list(Z=Z,B=B,x0=x0,A="zero")
# kemfit = MARSS(balancedPanel, model=model.gen)
# kemfit = MARSS(balancedPanel, model=model.gen,control=list(conv.test.slope.tol=0.00001,abstol=0.00001))
# kemfit = MARSS(balancedPanel, model=model.gen,control=list(conv.test.slope.tol=0.00001,abstol=0.00001),inits=inits)
kemfit = MARSS(balancedPanel, model=model.gen,inits=inits)
states = kemfit$states
# print(kemfit, what="model")
# print(kemfit, what="start")
# print(kemfit, what="states")
# print(kemfit, what="ytT")
# print(kemfit, what="states.se")
# print(kemfit, what="kfs")
print(dim(kemfit$par$Z))

In [None]:
factor = pd.DataFrame(states).transpose().set_index(data[balancedIndex].index)
factor.plot()

In [None]:
gdp[balancedIndex].plot()

# OLS - Nowcast

In [None]:
# factor = factor.rolling(window=3).sum()
olsSample = gdp.join(factor, how="inner")
gdpComp = olsSample[[0,1,2,3,4]]

In [None]:
gdpComp.head()

In [None]:
# X = sm.add_constant(gdpComp)
X = gdpComp
model = sm.OLS(olsSample["GDP"],X)
results = model.fit()
results.params
print(results.summary())

Next Steps:

* Compare PCA Factors to Smoothed Factors
* Compare Predicted GDP to actual