In [1]:
import pandas as pd
from pandas import DataFrame, Series

from datetime import datetime
import os.path as path

In [2]:
pd.read_csv('data/geeks.csv')

Unnamed: 0,이름,직업,국적,트위터,생년월일
0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
1,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
2,Turing,컴퓨터과학자,영국,,1912-06-23


문서를 읽어올 때는 종종 인코딩을 설정해야한다.

In [4]:
pd.read_csv('data/geeks_cp949.txt', encoding='cp949')

Unnamed: 0,이름,기술,국적,트위터,생년월일
0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
1,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
2,Turing,컴퓨터과학자,영국,,1912-06-23


파이썬 실행 환경의 인코딩과 문서의 인코딩이 일치하지 않으면, 문제가 생긴다.

In [5]:
pd.read_csv('data/geeks_cp949.txt') # UncodeDecodeError

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc0 in position 0: invalid start byte

데이터에 열 제목이 없는 경우

In [7]:
pd.read_csv('data/geeks_no_header.csv')

Unnamed: 0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
0,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
1,Turing,컴퓨터과학자,영국,,1912-06-23


In [8]:
pd.read_csv('data/geeks_no_header.csv', header=None)

Unnamed: 0,0,1,2,3,4
0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
1,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
2,Turing,컴퓨터과학자,영국,,1912-06-23


In [9]:
pd.read_csv('data/geeks_no_header.csv',
           names=['이름', '직업', '국적', '트위터', '생년월일'])

Unnamed: 0,이름,직업,국적,트위터,생년월일
0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
1,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
2,Turing,컴퓨터과학자,영국,,1912-06-23


In [19]:
프로그래머 = pd.read_csv('data/geeks.csv', encoding='utf-8')
프로그래머

Unnamed: 0,이름,직업,국적,트위터,생년월일
0,이성주,컴퓨터 프로그래머/데이터 과학자,대한민국,@LeeSeongjoo,1982-12-27
1,Rossum,컴퓨터 프로그래머,네덜란드,@gvanrossum,1956-01-31
2,Turing,컴퓨터과학자,영국,,1912-06-23


열을 왜 조회할 수 없을까?

In [20]:
프로그래머['직업'] # KeyError 오류 발생

KeyError: '직업'

파일을 살펴보니 쉼표 앞에 공백이 있었다!

In [21]:
프로그래머[' 직업']

0     컴퓨터 프로그래머/데이터 과학자
1             컴퓨터 프로그래머
2                컴퓨터과학자
Name:  직업, dtype: object

텍스트 파일은 구분자 앞 또는 뒤에 공백이 삽입되어 있을 수 있다. 그런 공백을 제거하는 것이 바람직하다.

In [17]:
프로그래머 = pd.read_csv('data/geeks.csv', skipinitialspace=True, encoding='utf-8')
프로그래머['직업']

0    컴퓨터 프로그래머/데이터 과학자
1            컴퓨터 프로그래머
2               컴퓨터과학자
Name: 직업, dtype: object

## 액셀 파일 읽기

### 예제: 한국수출입은행 (공공데이터) 액셀 파일 처리

In [29]:
파일양식 = 'data/한국수출입은행/국제금융시장동향/Global+Market+Daily+Table%28%EA%B0%92%EB%B3%B5%EC%82%AC%29_{}.xls'

In [30]:
파일경로 = 파일양식.format('0803')
파일경로

'data/한국수출입은행/국제금융시장동향/Global+Market+Daily+Table%28%EA%B0%92%EB%B3%B5%EC%82%AC%29_0803.xls'

파일 경로에 포함된 이상한 기호들의 정체

In [31]:
import urllib
urllib.parse.unquote_plus(파일경로)

'data/한국수출입은행/국제금융시장동향/Global Market Daily Table(값복사)_0803.xls'

액셀 파일을 읽어보니 위쪽 빈줄이 많다.

In [32]:
pd.read_excel(파일경로)

Unnamed: 0,Unnamed: 1.1,Global Market Daily Update,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,As of:,2014-10-20 00:00:00,2014-10-18 00:00:00,Unnamed: 9,2015-08-03 00:00:00
,,,,,,,,,,,,
,,,,,,,,,,,,
,,Equity,,,,,,,,,,
,,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,Low-high,
,MXWD Index,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13,,
,SPX Index,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49,,
,SX5E Index,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65,,
,DAX Index,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95,,
,NKY Index,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5,,
,SHCOMP Index,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67,,


위의 3줄은 건너뛰기로 한다.

In [34]:
pd.read_excel(파일경로, skiprows=3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Equity,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12
0,,,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,Low-high,
1,,MXWD Index,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13,,
2,,SPX Index,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49,,
3,,SX5E Index,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65,,
4,,DAX Index,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95,,
5,,NKY Index,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5,,
6,,SHCOMP Index,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67,,
7,,,,,,,,,,,,,
8,,,Fixed Income,,,,,,,,,,
9,,,,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,,


열은 액셀의 C열부터 K열까지 가져오도록 한다.

In [35]:
pd.read_excel(파일경로, skiprows=3, parse_cols="C:K")

Unnamed: 0,Equity,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
1,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13
2,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49
3,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65
4,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95
5,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5
6,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67
7,,,,,,,,,
8,Fixed Income,,,,,,,,
9,,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low


표가 여러 개라 열제목은 일단 없음으로 설정해야겠다.

In [37]:
국제_0803 = pd.read_excel(파일경로, skiprows=3, parse_cols="C:K", header=None)
국제_0803

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Equity,,,,,,,,
1,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
2,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13
3,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49
4,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65
5,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95
6,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5
7,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67
8,,,,,,,,,
9,Fixed Income,,,,,,,,


빈 줄은 삭제하자

In [42]:
국제_0803 = 국제_0803.dropna(how='all')
국제_0803

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Equity,,,,,,,,
1,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
2,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13
3,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49
4,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65
5,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95
6,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5
7,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67
9,Fixed Income,,,,,,,,
10,,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low


빈 줄이 삭제되면서 색인 번호가 고르지 않아졌다.

In [51]:
국제_0803.ix[8] # KeyError

0    Fixed Income
1             NaN
2             NaN
3             NaN
4             NaN
5             NaN
6             NaN
7             NaN
8             NaN
Name: 8, dtype: object

색인을 재설정하자

In [53]:
국제_0803 = 국제_0803.reset_index(drop=True)
국제_0803

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Equity,,,,,,,,
1,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
2,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13
3,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49
4,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65
5,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95
6,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5
7,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67
8,Fixed Income,,,,,,,,
9,,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low


각 표를 각각의 데이터프레임으로 잘라내자

In [68]:
국제_0803.ix[0:8]

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Equity,,,,,,,,
1,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
2,MSCI AC World,425.32,-0.00342097,0.0141638,-0.00342097,-0.00561115,0.0196586,442.7,392.13
3,S&P 500,2098.04,-0.00275686,0.0147028,-0.00275686,0.0150366,0.0190102,2130.82,1862.49
4,Euro Stoxx 50,3635.4,0.00963982,0.0348126,0.00963982,-0.0214978,0.155405,3828.78,2874.65
5,DAX,11443.7,0.0119135,0.0350313,0.0119135,-0.0437581,0.167066,12374.7,8571.95
6,Nikkei 225,20548.1,-0.00180372,0.00973017,-0.00180372,0.0572691,0.17749,20868,14532.5
7,상해,3622.91,-0.0111417,-0.0275537,-0.0111417,-0.062378,0.120021,5166.35,2187.67
8,Fixed Income,,,,,,,,


범위 목록을 만들어서 한번에 처리하자

In [74]:
색인범위목록 = {'equity': (0,7), 'fixed income': (8,15), 
          'fx':(16,22), 'commodity':(24,28), 'cds':(29,43)}

In [75]:
국제0803_지표 = {}
for 지표, 색인범위 in 색인범위목록.items():
    국제0803_지표[지표] = 국제_0803.ix[색인범위[0]:색인범위[1]]

In [80]:
국제0803_지표['cds']

Unnamed: 0,0,1,2,3,4,5,6,7,8
29,CDS,,,,,,,,
30,,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
31,China,101,0.831,-0.501,0.831,8.333,14.013,104.336,66.838
32,Indonesia,186.757,3.249,5.545,3.249,30.208,26.445,183.748,127.162
33,Malaysia,152.159,4.173,10.277,4.173,24.706,45.332,151.325,73.677
34,Thailand,109,2,1,2,5.5,6,119.511,80.84
35,Russia,345.773,6.039,3.628,6.039,-27.167,-126.441,628.765,221.835
36,South Africa,219.178,-0.717,-0.887,-0.717,11.862,29.295,240.794,159
37,Turkey,236.839,-1.569,6.529,-1.569,20.735,60.005,238.408,156
38,Brazil,292.362,4.642,-4.678,4.642,27.912,95.564,306.576,123.25


색인범위를 자동으로 구할 수도 있을까?

In [91]:
색인시작목록 = list(국제_0803.index[국제_0803.notnull().sum(1) == 1])
색인시작목록.append(국제_0803.index[-1]+1)
색인시작목록

[0, 8, 16, 23, 24, 29, 44]

In [92]:
색인범위목록 = []
i=0
while i< len(색인시작목록)-1:
    색인범위목록.append((색인시작목록[i], 색인시작목록[i+1]-1))
    i += 1
색인범위목록

[(0, 7), (8, 15), (16, 22), (23, 23), (24, 28), (29, 43)]

알아낸 것을 함수로 정리하자

In [101]:
def 데이터프레임추출(데이터프레임):
    색인시작목록 = list(국제_0803.index[국제_0803.notnull().sum(1) == 1])
    색인시작목록.append(국제_0803.index[-1]+1)
    
    
    색인범위목록 = []
    i=0
    while i< len(색인시작목록)-1:
        색인범위 = (색인시작목록[i], 색인시작목록[i+1]-1)
        i += 1
        # 범위 크기가 2 이하인 경우, 해당 범위 생략
        if 색인범위[1] - 색인범위[0] < 2:
            continue
        색인범위목록.append(색인범위)
        
        
    데이터프레임목록 = []
    for 색인범위 in 색인범위목록:
        프레임 = 데이터프레임.ix[색인범위[0]:색인범위[1]]
        데이터프레임목록.append(프레임)
        
    return 데이터프레임목록

인제 데이터프레임을 추출하자

In [103]:
프레임목록 = 데이터프레임추출(국제_0803)
프레임목록

[               0           1           2           3           4           5  \
 0         Equity         NaN         NaN         NaN         NaN         NaN   
 1            NaN  Last Price         D-1         D-7         MTD         QTD   
 2  MSCI AC World      425.32 -0.00342097   0.0141638 -0.00342097 -0.00561115   
 3        S&P 500     2098.04 -0.00275686   0.0147028 -0.00275686   0.0150366   
 4  Euro Stoxx 50      3635.4  0.00963982   0.0348126  0.00963982  -0.0214978   
 5            DAX     11443.7   0.0119135   0.0350313   0.0119135  -0.0437581   
 6     Nikkei 225     20548.1 -0.00180372  0.00973017 -0.00180372   0.0572691   
 7             상해     3622.91  -0.0111417  -0.0275537  -0.0111417   -0.062378   
 
            6        7        8  
 0        NaN      NaN      NaN  
 1        YTD  Yr High   Yr Low  
 2  0.0196586    442.7   392.13  
 3  0.0190102  2130.82  1862.49  
 4   0.155405  3828.78  2874.65  
 5   0.167066  12374.7  8571.95  
 6    0.17749    20868  14532.5

각 데이터에 대해 인덱스와 열 제목 설정

In [306]:
market_index_i18n_20150803 = []
for mi in frames:
    index_name = mi.irow(0)[0]
    columns = mi.irow(1)[1:]
    mi = mi[2:].set_index(0)
    # NaN Index 제거
    mi = mi.reindex(Series(mi.index).dropna())
    mi.index.name = index_name
    mi.columns = columns
    mi.columns.name='price'
    market_index_i18n_20150803.append(mi)

In [307]:
market_index_i18n_20150803[0]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
Equity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MSCI AC World,425.32,-0.003420966,0.01416377,-0.003420966,-0.005611147,0.01965861,442.7,392.13
S&P 500,2098.04,-0.002756864,0.01470275,-0.002756864,0.01503658,0.01901015,2130.82,1862.49
Euro Stoxx 50,3635.4,0.009639819,0.03481256,0.009639819,-0.02149776,0.1554047,3828.78,2874.65
DAX,11443.72,0.01191353,0.03503129,0.01191353,-0.04375808,0.1670656,12374.73,8571.95
Nikkei 225,20548.11,-0.00180372,0.009730173,-0.00180372,0.05726912,0.1774902,20868.03,14532.51
상해,3622.905,-0.01114167,-0.02755372,-0.01114167,-0.06237796,0.1200206,5166.35,2187.669


In [308]:
market_index_i18n_20150803[1]

price,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
Fixed Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
UST 10Y,2.148,-3,-6.95,-3.21,30.91,-2.32,2.6198,1.6407
UST 2Y,0.6646,0,1.51,0.4,18.84,0.01,0.7393,0.3076
Spread(10Y-2Y),1.4834,-3,-8.46,-3.61,12.07,-2.33,2.0562,1.1919
Bund 10Y,0.628,-1,-6.3,-1.6,43.4,8.7,1.167,0.075
Bund 2Y,-0.244,0,-1.8,-0.9,1.3,-13.4,0.027,-0.283
Spread(10Y-2Y),0.872,-1,-4.5,-0.7,42.1,22.1,1.157,0.342


In [309]:
market_index_i18n_20150803[2]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
FX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Dollar Index,97.494,0.001623243,0.01027958,0.001623243,0.009829613,0.08003855,100.33,81.328
EUR,1.095,-0.003095412,-0.01244589,-0.003095412,-0.001732154,-0.09489172,1.3422,1.0496
GBP,1.5587,-0.00224043,0.001799602,-0.00224043,0.04470509,0.0006419721,1.6886,1.4632
JPY,124.03,-0.001128759,-0.006288801,-0.001128759,-0.04079658,-0.0342659,102.0304,125.6124
AUD,0.7286,-0.0030104,0.002338699,-0.0030104,-0.0454605,-0.1087462,0.9378,0.7269


In [310]:
market_index_i18n_20150803[3]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
Commodity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
WTI,45.17,-0.0413837,-0.04684533,-0.0413837,-0.1542782,-0.1588454,93.38,47.12
Gold,1089.4,-0.005205004,-0.006837451,-0.005205004,-0.09473159,-0.08059752,1321.0,1086.0
Corn,376.5,-0.01245902,-0.01825293,-0.01245902,-0.08282582,-0.07208872,451.75,363.5


In [311]:
market_index_i18n_20150803[4]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low
CDS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
China,101.0,0.831,-0.501,0.831,8.333,14.013,104.336,66.838
Indonesia,186.757,3.249,5.545,3.249,30.208,26.445,183.748,127.162
Malaysia,152.159,4.173,10.277,4.173,24.706,45.332,151.325,73.677
Thailand,109.0,2.0,1.0,2.0,5.5,6.0,119.511,80.84
Russia,345.773,6.039,3.628,6.039,-27.167,-126.441,628.765,221.835
South Africa,219.178,-0.717,-0.887,-0.717,11.862,29.295,240.794,159.0
Turkey,236.839,-1.569,6.529,-1.569,20.735,60.005,238.408,156.0
Brazil,292.362,4.642,-4.678,4.642,27.912,95.564,306.576,123.25
Korea,54.165,0.331,-0.751,0.331,2.08,-0.326,67.609,45.491
Japan,40.0,0.0,-0.713,0.0,4.511,-27.5,73.833,31.335


처리 과정을 단일 함수화

In [359]:
def extract_global_index(filename, year=2015):
    # 파일명에서 날짜 정보 추출
    get_datetime = lambda filename, year: datetime.strptime(str(year)+path.basename(filename).split('.')[0][-4:], '%Y%m%d')
    date = get_datetime(filename, year)
    
    # 액셀 파일을 DataFrame으로 읽기
    data = pd.read_excel(filename, parse_cols="C:K", skiprows=3, header=None)
    data = data.dropna(how='all')
    
    # 데이터 유형별 구간 읽기
    header_idx = list(data.index[data.notnull().sum(1) == 1])
    header_idx.append(data.irow(-1).name+1)
    header_range = to_header_range(header_idx)
    
    # 구간별 DataFrame 잘라내기
    frames = []
    for s,e in header_range:
        frames.append(data.ix[s:e-1])
    
    # 각 구간별 DataFrame의 인덱스와 열 설정
    market_indice = []
    for mi in frames:
        index_name = mi.irow(0)[0]
        columns = mi.irow(1)[1:]
        mi = mi[2:].set_index(0)
        # NaN Index 제거
        mi = mi.reindex(Series(mi.index).dropna())
        mi.index.name = index_name
        mi.columns = columns
        mi.columns.name='price'
        mi['date'] = date
        market_indice.append(mi)
        
    return market_indice

In [360]:
market_indice_20150804 = extract_global_index('data/한국수출입은행/국제금융시장동향/Global+Market+Daily+Table%28%EA%B0%92%EB%B3%B5%EC%82%AC%29_0804.xls')

In [361]:
market_indice_20150804[0]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,date
Equity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MSCI AC World,424.92,-0.0009404684,0.006323268,-0.004358217,-0.006546339,0.01869965,442.7,392.13,2015-08-04
S&P 500,2093.32,-0.002249719,3.344082e-05,-0.00500038,0.01275303,0.01671766,2130.82,1862.49,2015-08-04
Euro Stoxx 50,3619.31,-0.004425923,0.01834496,0.005171231,-0.02582854,0.150291,3828.78,2874.65,2015-08-04
DAX,11456.07,0.001079195,0.02525168,0.01300558,-0.04272611,0.1683251,12374.73,8571.95,2015-08-04
Nikkei 225,20520.36,-0.001350489,0.009418616,-0.003151773,0.05584129,0.1759,20868.03,14532.51,2015-08-04
상해,3756.545,0.03688753,0.02553725,0.02533487,-0.0277914,0.1613354,5166.35,2187.669,2015-08-04


In [354]:
market_indice_20150804[1]

price,Last Price(%),D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,date
Fixed Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
UST 10Y,2.2213,7.0,-2.86,4.12,38.24,5.01,2.6198,1.6407,2015-08-04
UST 2Y,0.7322,7.0,6.42,7.16,25.6,6.77,0.7393,0.3076,2015-08-04
Spread(10Y-2Y),1.4891,3.375078e-14,-9.28,-3.04,12.64,-1.76,2.0562,1.1919,2015-08-04
Bund 10Y,0.638,1.0,-5.1,-0.6,44.4,9.7,1.101,0.075,2015-08-04
Bund 2Y,-0.252,-1.0,-1.6,-1.7,0.5,-14.2,0.009,-0.283,2015-08-04
Spread(10Y-2Y),0.89,2.0,-3.5,1.1,43.9,23.9,1.157,0.342,2015-08-04


In [355]:
market_indice_20150804[2]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,date
FX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Dollar Index,97.931,0.004482327,0.0119766,0.006112846,0.014356,0.08487964,100.33,81.389,2015-08-04
EUR,1.0881,-0.00630137,-0.01618445,-0.009377276,-0.008022609,-0.1005951,1.341,1.0496,2015-08-04
GBP,1.5564,-0.001475589,-0.00313841,-0.003712713,0.04316354,-0.0008345638,1.6853,1.4632,2015-08-04
JPY,124.38,-0.002813957,-0.0065927,-0.00393954,-0.04349574,-0.03698344,102.0304,125.6124,2015-08-04
AUD,0.738,0.01290145,0.00572363,0.009852217,-0.03314555,-0.09724771,0.9378,0.7269,2015-08-04


In [356]:
market_indice_20150804[3]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,date
Commodity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
WTI,45.74,0.01261899,-0.04668612,-0.02928693,-0.1436061,-0.1482309,93.38,45.17,2015-08-04
Gold,1090.7,0.001193317,-0.005470958,-0.004017898,-0.09365132,-0.07950038,1321.0,1086.0,2015-08-04
Corn,378.75,0.005976096,-0.01750973,-0.006557377,-0.0773447,-0.06654344,451.75,363.5,2015-08-04


In [357]:
market_indice_20150804[4]

price,Last Price,D-1,D-7,MTD,QTD,YTD,Yr High,Yr Low,date
CDS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
China,102.003,1.003,4.493,1.834,9.336,15.016,104.336,66.838,2015-08-04
Indonesia,189.048,2.291,6.705,5.54,32.499,28.736,186.757,127.162,2015-08-04
Malaysia,153.209,1.05,12.278,5.223,25.756,46.382,152.159,73.677,2015-08-04
Thailand,112.0,3.0,6.5,5.0,8.5,9.0,119.511,80.84,2015-08-04
Russia,350.04,4.267,7.374,10.306,-22.9,-122.174,628.765,221.835,2015-08-04
South Africa,219.371,0.193,-1.504,-0.524,12.055,29.488,240.794,159.0,2015-08-04
Turkey,239.152,2.313,3.753,0.744,23.048,62.318,238.408,156.0,2015-08-04
Brazil,300.21,7.848,0.986,12.49,35.76,103.412,306.576,123.25,2015-08-04
Korea,55.001,0.836,2.67,1.167,2.916,0.51,67.609,45.491,2015-08-04
Japan,40.0,0.0,-0.81,0.0,4.511,-27.5,73.833,31.335,2015-08-04
