In [36]:
import requests
import bs4
import pandas as pd
import numpy as np

def extractDf(url):
    r = requests.get(url)
    html = r.text

    soup = bs4.BeautifulSoup(html, 'html.parser')
    allRows = soup.find('tbody').find_all('tr')

    results = [[data.text.split('?')[0].replace('\n','').replace('\t','') for data in td.find_all('td')]\
               for td in allRows]

    rowspan = [] #row

    for tr_no, tr in enumerate(allRows): #enumerate returns index, data 
        tmp = []
        for td_no, data in enumerate(tr.find_all('td')):
            if data.has_attr("rowspan"):
                rowspan.append((tr_no, td_no, int(data["rowspan"]), data.get_text().split('?')[0].replace('\n','').replace('\t','')))

    if rowspan:
        for i in rowspan:
            for j in range(1, i[2]):
                results[i[0]+j].insert(i[1],i[3])

    headers=[]
    for i in range(2,11):
        if i==6:
            continue
        elif i==9:
            headers.append(soup.find('thead').find('th',{'class':'th_result'+str(i)+'_on'}).text)
        else:
            headers.append(soup.find('thead').find('th',{'class':'th_result'+str(i)}).text)    
    
    df = pd.DataFrame(data=results)
    df = df.drop(0,1)
    df.columns = headers
    
    df['설정액'] = df['설정액'].str.replace(',','').astype(float)
    df['1개월 증감액'] = df['1개월 증감액'].str.replace(',','').astype(float)
    df = df.replace('N/A',np.nan)
    df.dropna(inplace=True)
    
    df = df.apply(pd.to_numeric, args =('ignore',))
    # Make MBY, MBQ column to calculate the standard deviation based on one-month returns
    df['MBY'] = ((((100+df['1년'])/100)**(1/12))-1)*100 # MoM Based On Year Return. 
                                                    # One-month average return from 12-square-root based on 1-year return
    df['MBQ'] = ((((100+df['3개월'])/100)**(1/3))-1)*100 # MoM Based On Quarter Return
                                                        # One-month average return from 3-square-root based on 1-quarter return
    df = df.drop(df[(df.MBY<0)|(df.MBQ<0)].index) # 1년수익률, 1분기 수익률이 마이너스일경우, MBY,MBQ는 0이하가 되므로 drop.
    df = df.drop(df[(df['설정액']<1000)].index)
    dfStd = df[['1개월','MBY','MBQ']].std(axis=1)
    arr = np.array([df.loc[0,'1개월'],df.loc[0,'MBY'],df.loc[0,'MBQ']])
    arrStd = np.std(arr, axis=0)

    df['Risk'] = (dfStd - dfStd.min())/(dfStd.max()-dfStd.min())

    df.reset_index(inplace=True, drop=True)
    return df

domesticDf=extractDf('http://info.finance.naver.com/fund/fundTypeEarningRate.nhn?ivstAreaWorldYn=N')
#overseasDf=extractDf('http://info.finance.naver.com/fund/fundTypeEarningRate.nhn?ivstAreaWorldYn=Y')

domesticDf

소유형        K200인덱스
펀드수             69
설정액          67730
1개월 증감액      -4658
올해            8.33
1개월           4.75
3개월           8.33
1년           15.92
MBY        1.23869
MBQ        2.70295
Name: 0, dtype: object
arrStd:  1.44005158804
STd.min:  0.00175605888401
Std.max:  1.76369579699
arrStd-min:  1.43829552915
max-min:  1.7619397381
risk k200index:  0.816313689991
1.76369579699


Unnamed: 0,소유형,펀드수,설정액,1개월 증감액,올해,1개월,3개월,1년,MBY,MBQ,Risk
0,K200인덱스,69,67730.0,-4658.0,8.33,4.75,8.33,15.92,1.238693,2.702949,1.0
1,일반주식,320,133551.0,-5013.0,4.51,2.58,4.51,1.88,0.155333,1.481283,0.688077
2,배당주식,41,23719.0,-1313.0,4.22,3.03,4.22,3.41,0.27982,1.387331,0.784354
3,일반주식혼합,84,26162.0,639.0,2.96,2.06,2.96,4.12,0.337016,0.977089,0.493302
4,공격적자산배분,25,1838.0,307.0,2.7,1.78,2.7,3.22,0.264453,0.892019,0.431194
5,일반채권,67,43601.0,-2522.0,0.41,0.07,0.41,1.2,0.099454,0.13648,0.01791
6,초단기채권,21,51063.0,172.0,0.41,0.12,0.41,1.39,0.115102,0.13648,0.00536
7,우량채권,27,13485.0,-539.0,0.25,0.04,0.25,0.36,0.029951,0.083264,0.015081
8,MMF,125,966357.0,-16077.0,0.32,0.11,0.32,1.3,0.107693,0.106553,0.0
9,공모주하이일드,6,2103.0,-106.0,0.92,0.29,0.92,2.95,0.242571,0.305731,0.017664
