In [1]:
import requests
import bs4
import pandas as pd
import numpy as np

def extractDf(url):
    r = requests.get(url)
    html = r.text

    soup = bs4.BeautifulSoup(html, 'html.parser')
    allRows = soup.find('tbody').find_all('tr')

    results = [[data.text.split('?')[0].replace('\n','').replace('\t','') for data in td.find_all('td')]\
               for td in allRows]

    rowspan = [] #row

    for tr_no, tr in enumerate(allRows): #enumerate returns index, data 
        tmp = []
        for td_no, data in enumerate(tr.find_all('td')):
            if data.has_attr("rowspan"):
                rowspan.append((tr_no, td_no, int(data["rowspan"]), data.get_text().split('?')[0].replace('\n','').replace('\t','')))

    if rowspan:
        for i in rowspan:
            for j in range(1, i[2]):
                results[i[0]+j].insert(i[1],i[3])

    headers=[]
    for i in range(2,11):
        if i==6:
            continue
        elif i==9:
            headers.append(soup.find('thead').find('th',{'class':'th_result'+str(i)+'_on'}).text)
        else:
            headers.append(soup.find('thead').find('th',{'class':'th_result'+str(i)}).text)    
    
    df = pd.DataFrame(data=results)
    df = df.drop(0,1)
    df.columns = headers
    
    df['설정액'] = df['설정액'].str.replace(',','').astype(float)
    df['1개월 증감액'] = df['1개월 증감액'].str.replace(',','').astype(float)
    df = df.replace('N/A',np.nan)
    df.dropna(inplace=True)
    
    df = df.apply(pd.to_numeric, args =('ignore',))
    # Make MBY, MBQ column to calculate the standard deviation based on one-month returns
    df['MBY'] = ((((100+df['1년'])/100)**(1/12))-1)*100 # MoM Based On Year Return. 
                                                    # One-month average return from 12-square-root based on 1-year return
    df['MBQ'] = ((((100+df['3개월'])/100)**(1/3))-1)*100 # MoM Based On Quarter Return
                                                        # One-month average return from 3-square-root based on 1-quarter return
    df = df.drop(df[(df.MBY<0)|(df.MBQ<0)].index) # 1년수익률, 1분기 수익률이 마이너스일경우, MBY,MBQ는 0이하가 되므로 drop.
    df = df.drop(df[(df['설정액']<1000)].index)
    dfStd = df[['1개월','MBY','MBQ']].std(axis=1)
    arr = np.array([df.loc[0,'1개월'],df.loc[0,'MBY'],df.loc[0,'MBQ']])
    arrStd = np.std(arr, axis=0)

    df['Risk'] = (dfStd - dfStd.min())/(dfStd.max()-dfStd.min())

    df.reset_index(inplace=True, drop=True)
    return df

domesticDf=extractDf('http://info.finance.naver.com/fund/fundTypeEarningRate.nhn?ivstAreaWorldYn=N')
#overseasDf=extractDf('http://info.finance.naver.com/fund/fundTypeEarningRate.nhn?ivstAreaWorldYn=Y')

domesticDf

Unnamed: 0,소유형,펀드수,설정액,1개월 증감액,올해,1개월,3개월,1년,MBY,MBQ,Risk
0,K200인덱스,69,67661.0,-4493.0,7.96,3.0,7.81,17.53,1.355128,2.538356,0.829295
1,배당주식,41,23693.0,-1344.0,4.41,2.42,4.72,4.68,0.381876,1.549209,1.0
2,일반주식,320,133430.0,-5068.0,4.58,2.17,4.47,3.09,0.253924,1.468334,0.947836
3,일반주식혼합,84,26205.0,618.0,2.95,1.47,2.86,4.86,0.396249,0.944387,0.524136
4,공격적자산배분,25,1845.0,303.0,2.66,1.23,2.53,4.26,0.348252,0.83632,0.430871
5,일반채권혼합,230,84213.0,-2344.0,1.28,0.72,1.22,0.18,0.014988,0.405024,0.344127
6,보수적자산배분,41,6681.0,-204.0,0.56,0.75,0.64,0.2,0.016651,0.21288,0.370067
7,초단기채권,21,52575.0,1789.0,0.42,0.13,0.41,1.39,0.115102,0.13648,0.008873
8,일반채권,67,43581.0,-2543.0,0.42,0.13,0.37,1.18,0.097805,0.123182,0.014754
9,우량채권,27,13473.0,-541.0,0.25,0.17,0.15,0.34,0.028289,0.049975,0.072913
