In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, SoupStrainer
import httplib2
from urllib.request import urlopen
import numpy as np
import pandas as pd
import configparser

import requests
import sys
import re

import sklearn
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [3]:
config = configparser.ConfigParser()
config.read('MF.ini')

['MF.ini']

In [4]:
def parse_html_table(table):
    n_columns = 0
    n_rows=0
    column_names = []

    # Find number of rows and columns
    # we also find the column titles if we can
    for row in table.find_all('tr'):

        # Determine the number of rows in the table
        td_tags = row.find_all('td')
        if len(td_tags) > 0:
            n_rows+=1
            if n_columns == 0:
                # Set the number of columns for our table
                n_columns = len(td_tags)

        # Handle column names if we find them
        th_tags = row.find_all('th') 
        if len(th_tags) > 0 and len(column_names) == 0:
            for th in th_tags:
                column_names.append(th.get_text())

    # Safeguard on Column Titles
    if len(column_names) > 0 and len(column_names) != n_columns:
        raise Exception("Column titles do not match the number of columns")

    columns = column_names if len(column_names) > 0 else range(0,n_columns)
    df = pd.DataFrame(columns = columns,
                      index= range(0,n_rows))
    row_marker = 0
    for row in table.find_all('tr'):
        column_marker = 0
        columns = row.find_all('td')
        for column in columns:
            df.iat[row_marker,column_marker] = column.get_text()
            column_marker += 1
        if len(columns) > 0:
            row_marker += 1

    # Convert to float if possible
    for col in df:
        try:
            df[col] = df[col].astype(float)
        except ValueError:
            pass

    return df

def parse_for_riskratios(url):
    url_risk = url
    html_risk = urlopen(url_risk)
    soup_risk = BeautifulSoup(html_risk, 'html')

    risk_ratios = ['Standard Deviation','Beta','Sharpe Ratio','Treynor Ratio','Jension Alpha']
    risk_tab = soup_risk.find_all("div",{"class":"percentage"})

    risk_dict = {}
    for i in range(0,len(risk_tab)):
        risk_measure = soup_risk.find_all("div",{"class":"percentage"})[i]
        risk_measure_arr = risk_measure.find_all('span')
        risk_dict[risk_ratios[i]] = [ risk_measure_arr[0].get_text(), risk_measure_arr[2].get_text()]

    risk_df = pd.DataFrame(risk_dict).transpose()
    risk_df.rename(columns={ risk_df.columns[0]: "Fund" }, inplace = True)  
    risk_df.rename(columns={ risk_df.columns[1]: "Category Average" }, inplace = True) 
    
    return(risk_df)

def make_density(comp_mod, use_fund_name, per, color, x_label, y_label, ax):
    
    try:
        fund_per = pd.to_numeric(comp_mod[comp_mod['Scheme Name'].str.contains(use_fund_name) & comp_mod['Plan'].str.contains("Regular")][per]).item()
    except:
        try:
            fund_per = pd.to_numeric(comp_mod[comp_mod['Scheme Name'].str.contains(use_fund_name) & comp_mod['Plan'].str.contains("Direct")][per]).item()
        except:
            try:
                fund_per = 0
            except:
                pass
    
    # Draw the histogram and fit a density plot.
    # sns.distplot(,hist=True, kde=True, kde_kws={'linewidth': 2}, color=color, ax=ax)
    
    sns.kdeplot(data=pd.to_numeric(comp_mod[per], errors='coerce').dropna(), color=color, ax=ax)
    
    # draw a vertical line at the per the fund performance.
    ax.axvline(fund_per, color='r', linestyle='dashed', linewidth=2)

    # Plot formatting
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_title(per)

def highlight_riskmetrics(row):
    if (row.Difference > 0) and (row.Interpret == 'Risk'):
        return ['background-color: mistyrose']*4
    elif (row.Difference < 0) and (row.Interpret == 'Risk_Adj_Return'):
        return ['background-color: mistyrose']*4
    else:
        return ['background-color: honeydew']*4

In [5]:
dictionary = {}
for section in config.sections():
    dictionary[section] = {}
    for option in config.options(section):
        dictionary[section][option] = config.get(section, option)

def f(Fund):
    url = config['FUND_URL'][Fund]
    base_url = "https://www.moneycontrol.com/mutual-funds/nav/"
    app_url = base_url + url
    
    html = urlopen(app_url)
    soup = BeautifulSoup(html, 'html')
    
    fund_name = soup.title.get_text().split(" [")[0]
    use_fund_name = fund_name.split(" -")[0]
    
    print(fund_name)
    print("\n")

#     print('Fund Performance Summary:')
#     table_FP = soup.find_all('table')[2]
#     res_FP = parse_html_table(table_FP)
#     res_FP.to_html
#     display(res_FP)
#     print("\n")

#     print('Fund allocation breakdown:')
#     table_FA = soup.find_all('table')[4]
#     res_FA = parse_html_table(table_FA)
#     res_FA['Stock Invested in'] = res_FA['Stock Invested in'].str.replace('\n', '')
#     res_FA.to_html
#     display(res_FA)
    
    return_str = url.split("/")
    url_returns = base_url + return_str[0] + '/returns/' + return_str[1]
    html_returns = urlopen(url_returns)
    soup_returns = BeautifulSoup(html_returns, 'html')
    table_ret = soup_returns.find_all('table')[3]
    res_ret = parse_html_table(table_ret)
    res_ret.to_html
    
    print('Fund Performance Summary:')
    display(res_ret)
    print("\n")
    
#     try:
    res_risk = parse_for_riskratios(app_url)
    print('Risk Ratios for Fund:')
    res_risk
    res_risk = res_risk.astype(float)

    dict_interpret = {'Standard Deviation':'Risk','Beta':'Risk',
                      'Sharpe Ratio':'Risk_Adj_Return','Treynor Ratio':'Risk_Adj_Return',
                      'Jension Alpha':'Risk_Adj_Return'}
    df_int = pd.DataFrame(dict_interpret, index = ["Interpret"]).T

    res_risk['Difference'] = res_risk['Fund'] - res_risk['Category Average']
    res_risk = res_risk.join(df_int,how='left')

    res_risk = res_risk.style.apply(highlight_riskmetrics, axis=1)
    display(res_risk)
    print("\n")
    
#     except:
#         print('Not enough data to display', "\n")
    
    new_page = soup.find("div", {"class":"clearfix viewmore hidden-xs hidden-sm"})
    new_url = new_page.find_all("a")[0]['href']

    new_html = urlopen(new_url)
    new_soup = BeautifulSoup(new_html, 'html')
    new_table = new_soup.find_all('table')[1]
    comp_df = parse_html_table(new_table)

    rename_dict = {}
    for col in comp_df.columns:
        str = col
        spl = str.split("\n")

        rename_dict[col] = spl[0]

    comp_df = comp_df.rename(columns=rename_dict)

    comp_mod = comp_df.copy(deep=True)
    cols = comp_mod.columns[5:]
    for col in cols:
        comp_mod[col] = comp_mod[col].str.replace('%', '')
        comp_mod[col] = comp_mod[col].str.replace('-', '0')

    print("Comparison of Fund performance in peer group")
    
    periods = ['6M','1Y','3Y','5Y']

    num_subplots = len(periods)
    ncols = 2
    nrows = (num_subplots + ncols - 1) // ncols
    fig, axes = plt.subplots(ncols=ncols, nrows=nrows, figsize=(ncols * 6, nrows * 5))
    colors = plt.cm.tab10.colors

    for ax, per, color in zip(np.ravel(axes), periods, colors):
        make_density(comp_mod, use_fund_name, per, color, '% return', '', ax)

    for ax in np.ravel(axes)[num_subplots:]:  # remove possible empty subplots at the end
        ax.remove()

    plt.show()

In [6]:
interact(f, Fund=dictionary['FUND_URL'].keys())

interactive(children=(Dropdown(description='Fund', options=('aditya_digitalindia', 'axis_longterm_equity', 'ax…

<function __main__.f(Fund)>