In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt
import statsmodels.api as sm
import datetime

In [2]:
df = pd.read_csv('Dataset.csv')
# df.year = pd.to_datetime(df.year,format='%Y-%m-%d')

In [3]:
def timestampToYear(s):
    return datetime.datetime.strptime(s, '%Y-%m-%d').year

In [4]:
df.year = df.year.apply(timestampToYear)

In [5]:
df.set_index('year',inplace=True)

In [6]:
def olsEstimate(series,value,country):
    model = sm.OLS(series, sm.add_constant(series.index)).fit()
    result = pd.DataFrame({'coef':model.params,'p-value':model.pvalues,'country':country,'value':value})
    result.index = ['constant','trend']
    result = result.reset_index()
    return result

In [7]:
LOGRLP = olsEstimate(df.LOGRLP,'LOGRLP','Japan')
LOGRM = olsEstimate(df.LOGRM,'LOGRM','Japan')
LOGRX = olsEstimate(df.LOGRX,'LOGRX','Japan')
dif_LOGRLP = olsEstimate(df.dif_LOGRLP,'dif_LOGRLP','Japan')
dif_LOGRM = olsEstimate(df.dif_LOGRM,'dif_LOGRM','Japan')
dif_LOGRX = olsEstimate(df.dif_LOGRX,'dif_LOGRX','Japan')

In [8]:
df.head()

Unnamed: 0_level_0,RLP,RM,RX,LOGRLP,LOGRM,LOGRX,dif_LOGRLP,dif_LOGRM,dif_LOGRX
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1961,49.686283,110826600.0,268381300.0,3.905729,18.523477,19.407919,0.026884,0.013281,0.009853
1962,53.506872,110501200.0,321374500.0,3.97981,18.520537,19.588118,0.018967,-0.000159,0.009285
1963,57.561585,130621000.0,364208000.0,4.052855,18.687811,19.713236,0.018354,0.009032,0.006387
1964,63.503722,148529000.0,416622100.0,4.151099,18.816291,19.84769,0.02424,0.006875,0.006821
1965,66.112662,159807900.0,486170300.0,4.19136,18.889483,20.00207,0.009699,0.00389,0.007778


In [None]:
##http://datatopics.worldbank.org/world-development-indicators/themes/economy.html

In [11]:
pd.options.display.float_format = '{:.3f}'.format

In [12]:
pd.concat([LOGRLP,LOGRM,LOGRX,dif_LOGRLP,dif_LOGRM,dif_LOGRX]).set_index(['country','value','index']).stack().unstack(level=[1,2])

Unnamed: 0_level_0,value,LOGRLP,LOGRLP,LOGRM,LOGRM,LOGRX,LOGRX,dif_LOGRLP,dif_LOGRLP,dif_LOGRM,dif_LOGRM,dif_LOGRX,dif_LOGRX
Unnamed: 0_level_1,index,constant,trend,constant,trend,constant,trend,constant,trend,constant,trend,constant,trend
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Japan,coef,-146.755,0.077,-189.209,0.106,-130.795,0.077,0.691,-0.0,0.148,-0.0,0.281,-0.0
Japan,p-value,0.0,0.0,0.0,0.0,0.0,0.0,0.249,0.26,0.493,0.51,0.006,0.007


In [25]:
##Unit Root Test
from statsmodels.tsa.stattools import adfuller, kpss

In [105]:
def check_for_stationarity(X, reg,value):
    # H_0 in adfuller is unit root exists (non-stationary)
    # We must observe significant p-value to convince ourselves that the series is stationary
    adf = adfuller(X, regression=reg, maxlag=1)
    result = pd.DataFrame({'ADF':adf[0], 'p-value':adf[1],'lag':adf[2],'Num of Data':adf[3],'1%':round(adf[4]['1%'],3)},index=[value])
    return result

In [106]:
a = check_for_stationarity(df.LOGRLP,'ct','LOGRLP')
b = check_for_stationarity(df.LOGRM,'ct', 'LOGRM')
c = check_for_stationarity(df.LOGRX,'ct','LOGRX')
d = check_for_stationarity(df.dif_LOGRLP,'nc', 'dif_LOGRLP')
e = check_for_stationarity(df.dif_LOGRM,'nc','dif_LOGRM')
f = check_for_stationarity(df.dif_LOGRX,'c','dif_LOGRX')

In [107]:
pd.concat([a,b,c,d,e,f]).T

Unnamed: 0,LOGRLP,LOGRM,LOGRX,dif_LOGRLP,dif_LOGRM,dif_LOGRX
ADF,-3.707,-2.757,-2.891,-3.407,-3.508,-3.577
p-value,0.022,0.213,0.165,0.001,0.0,0.006
lag,1.0,1.0,1.0,0.0,0.0,0.0
Num of Data,34.0,34.0,34.0,35.0,35.0,35.0
1%,-4.253,-4.253,-4.253,-2.633,-2.633,-3.633
