In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt

In [2]:
df = pd.read_csv('export_price_index.csv', encoding='cp932')

In [3]:
df = df[df['Time period'].isin(np.arange(1960,2014).astype('str'))].drop(columns=['Series','Time period','Flag Codes','Flags'])

In [4]:
# df.SERIES.unique()

In [5]:
country_dict ={'JPN-EXP-PRI-TOTAL':'XPI',
               'JPN-EXP-VAL-TOTAL':'XVAL',
               'JPN-EXP-VOL-TOTAL':'XVOL',
               'JPN-IMP-PRI-TOTAL':'MPI',
               'JPN-IMP-VAL-TOTAL':'MVAL',
               'JPN-IMP-VOL-TOTAL':'MVOL'}
index = ['JPN-EXP-PRI-TOTAL', 'JPN-EXP-VAL-TOTAL','JPN-EXP-VOL-TOTAL', 'JPN-IMP-PRI-TOTAL', 'JPN-IMP-VAL-TOTAL','JPN-IMP-VOL-TOTAL']

In [6]:
df = df[df.SERIES.isin(index)&df.TIME.isin(np.arange(1960,1997).astype('str'))]
df = df.rename(columns={'TIME':'year'})
df.year = pd.to_datetime(df.year,format='%Y')

In [7]:
df['SERIES'] = df['SERIES'].replace(country_dict)

In [8]:
df = df.set_index(['year','SERIES'])

In [9]:
df = df.unstack()
df.columns = df.columns.droplevel(level=0)

In [10]:
###よくわからんからTrade VolumeとValueは削除してほかのリソースを使う
df = df.drop(columns=['MVOL','MVAL','XVOL','XVAL'])

In [11]:
df.head()

SERIES,MPI,XPI
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1960-01-01,60.612896,108.634902
1961-01-01,60.846707,104.272264
1962-01-01,59.020749,101.006817
1963-01-01,60.746502,103.384062
1964-01-01,61.659481,104.833921


In [12]:
japan = pd.read_csv('Japan_data.csv')
japan = japan.drop(columns='country').set_index('year')

In [13]:
labor = pd.read_csv('labor_force_japan.csv')
labor.columns = ['year','Labor']
labor = labor.set_index(['year'])
labor = labor*1000

In [14]:
df = pd.merge(df, japan, right_index=True, left_index=True).drop(columns=['IND'])
df = pd.merge(df, labor, right_index=True, left_index=True,how='left')

In [15]:
df.head()

Unnamed: 0_level_0,MPI,XPI,GDP,M,X,DEFLATOR,Labor
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1960-01-01,60.612896,108.634902,44307340000.0,4558322000.0,18603240000.0,22.822009,45103330.0
1961-01-01,60.846707,104.272264,53508620000.0,5833317000.0,21578910000.0,24.598866,45623330.0
1962-01-01,59.020749,101.006817,60723020000.0,5641652000.0,25030560000.0,25.631915,46140000.0
1963-01-01,60.746502,103.384062,69498130000.0,6863869000.0,29034310000.0,27.044353,46524170.0
1964-01-01,61.659481,104.833921,81749010000.0,7922199000.0,33678490000.0,28.485468,47095000.0


In [16]:
def adjustment(series):
    return series / series['1987-01-01'] *100

In [17]:
df.XPI = adjustment(df.XPI)
df.MPI = adjustment(df.MPI)
df.DEFLATOR = adjustment(df.DEFLATOR)

In [18]:
##見やすいように並べ替え
df = df.reset_index().set_index(['GDP','M','X','Labor','DEFLATOR','MPI']).reset_index().set_index(['year'])

In [19]:
df['RLP'] = df['GDP'] / df['DEFLATOR'] / df['Labor']
df['RM'] = df['M'] / df['MPI']
df['RX'] = df['X'] / df['XPI']
df['LOGRLP'] = np.log(df.RLP)
df['LOGRM'] = np.log(df.RM)
df['LOGRX'] = np.log(df.RX)
df['dif_LOGRLP'] =df['LOGRLP'].pct_change()
df['dif_LOGRM'] = df['LOGRM'].pct_change()
df['dif_LOGRX'] = df['LOGRX'].pct_change()

In [20]:
##分析対象のデータのみにする。
dataset = pd.DataFrame({'RLP':df.RLP,'RM':df.RM,'RX':df.RX,
                        'LOGRLP':df['LOGRLP'],'LOGRM':df['LOGRM'],'LOGRX':df['LOGRX'],
                       'dif_LOGRLP':df['dif_LOGRLP'],'dif_LOGRM':df['dif_LOGRM'],'dif_LOGRX':df['dif_LOGRX']},
                       index=df.index)

In [21]:
##Logとってないバージョン
dataset.reset_index(inplace=True)
dataset.head()

Unnamed: 0,year,RLP,RM,RX,LOGRLP,LOGRM,LOGRX,dif_LOGRLP,dif_LOGRM,dif_LOGRX
0,1960-01-01,44.856778,86937130.0,222080600.0,3.803475,18.280696,19.218551,,,
1,1961-01-01,49.686283,110826600.0,268381300.0,3.905729,18.523477,19.407919,0.026884,0.013281,0.009853
2,1962-01-01,53.506872,110501200.0,321374500.0,3.97981,18.520537,19.588118,0.018967,-0.000159,0.009285
3,1963-01-01,57.561585,130621000.0,364208000.0,4.052855,18.687811,19.713236,0.018354,0.009032,0.006387
4,1964-01-01,63.503722,148529000.0,416622100.0,4.151099,18.816291,19.84769,0.02424,0.006875,0.006821


In [28]:
# multiple line plot
year = dataset.year
plt.plot(year, dataset.RLP, color='black', label='Labor Productivity')
plt.plot(year, dataset.RM, color='orange', label='Import')
plt.plot(year, dataset.RX, color='skyblue', label='Export')
plt.legend()
plt.show()

In [31]:
# multiple line plot
year = dataset.year
plt.plot(year, dataset.LOGRLP, color='black', label='Log Labor Productivity')
plt.plot(year, dataset.LOGRM, color='orange', label='Log Import')
plt.plot(year, dataset.LOGRX, color='skyblue', label='Log Export')
plt.legend()
plt.show()

In [32]:
# multiple line plot
year = dataset.year
plt.plot(year, dataset.dif_LOGRLP, color='black', label='dif Log Labor Productivity')
plt.plot(year, dataset.dif_LOGRM, color='orange', label='dif Log Import')
plt.plot(year, dataset.dif_LOGRX, color='skyblue', label='dif Log Export')
plt.legend()
plt.show()

In [26]:
dataset.dropna(axis=0,inplace=True)
dataset.to_csv('Dataset.csv',index=None)

In [None]:
model = sm.OLS(np.arange(),nstant(series.index)).fit()
result = pd.DataFrame({'coef':model.params,'p-value':model.pvalues,'country':country,'value':value})
result.index = ['constant','trend']
result = result.reset_index()