In [142]:
import pandas as pd
import urllib.request, json
import plotly_express as px
import plotly.figure_factory as ff

In [3]:
with urllib.request.urlopen('https://api.mfapi.in/mf') as url:
    data = json.load(url)
df_mfs = pd.DataFrame(data)

In [10]:
df_mfs[df_mfs.schemeName.str.startswith('Parag')]

Unnamed: 0,schemeCode,schemeName
20226,122639,Parag Parikh Long Term Equity Fund - Direct Pl...
20227,122639,Parag Parikh Flexi Cap Fund - Direct Plan - Gr...
20228,122640,Parag Parikh Long Term Equity Fund - Regular P...
20229,122640,Parag Parikh Flexi Cap Fund - Regular Plan - G...
35210,143260,Parag Parikh Liquid Fund- Regular Plan- Growth
35211,143261,Parag Parikh Liquid Fund- Regular Plan- Monthl...
35212,143261,Parag Parikh Liquid Fund- Regular Plan- Monthl...
35213,143262,Parag Parikh Liquid Fund- Direct Plan- Monthly...
35214,143262,Parag Parikh Liquid Fund- Direct Plan- Monthly...
35215,143263,Parag Parikh Liquid Fund- Direct Plan- Daily D...


In [12]:
scheme_code = '122639'
mf_url = 'https://api.mfapi.in/mf/' + scheme_code
with urllib.request.urlopen(mf_url) as url:
    data = json.load(url)

In [63]:
df_navs = pd.DataFrame(data['data'])
df_navs['date'] = pd.to_datetime(df_navs.date, format='%d-%m-%Y')
df_navs['nav'] = df_navs['nav'].astype(float)
df_navs = df_navs.sort_values(['date']).set_index(['date'])
df_dates = pd.DataFrame(pd.date_range(start=df_navs.index.min(), end=df_navs.index.max()), columns=['date']).set_index(['date'])
df_navs = df_navs.join(df_dates, how='outer').ffill().reset_index()

In [64]:
px.line(df_navs, x = 'date', y='nav', log_y=True)

In [None]:
get_cagr <- function(dt_navs, num_years=1){
    dt_navs[, prev_nav := shift(nav, 365*num_years)]
    dt_cagr <- na.omit(dt_navs)
    dt_cagr[, returns := nav/prev_nav - 1]
    dt_cagr[, cagr := 100 * ((1 + returns) ^ (1/num_years) - 1)]
    dt_cagr[, years := as.factor(num_years)]
    dt_cagr <- dt_cagr[, c('date', 'years', 'cagr')]
    dt_navs[, prev_nav := NULL]     # Remove the extra column added in dt_navs
    return (dt_cagr)
}


In [71]:
def get_cagr(df_navs_orig, num_years = 1):
    df_navs = df_navs_orig.copy()
    df_navs['prev_nav'] = df_navs.nav.shift(365 * num_years)
    df_navs = df_navs.dropna()
    df_navs['returns'] = df_navs['nav'] / df_navs['prev_nav'] - 1
    df_navs['cagr'] = 100 * ((1 + df_navs['returns']) ** (1 / num_years) - 1)
    df_navs['years'] = num_years
    df_cagr = df_navs[['date', 'years', 'cagr']]
    return df_cagr

In [75]:
years = [1, 3, 5, 7, 9]
list_cagr = []
for y in years:
    df_cagr = get_cagr(df_navs, y)
    list_cagr.append(df_cagr)
df_cagrs = pd.concat(list_cagr)

In [76]:
df_cagrs

Unnamed: 0,date,years,cagr
365,2014-05-28,1,36.716937
366,2014-05-29,1,35.736411
367,2014-05-30,1,34.179234
368,2014-05-31,1,34.411007
369,2014-06-01,1,34.411007
...,...,...,...
3412,2022-09-30,9,19.304131
3413,2022-10-01,9,19.285022
3414,2022-10-02,9,19.351844
3415,2022-10-03,9,19.296241


In [80]:
px.line(df_cagrs, x='date', y='cagr', color='years')

In [86]:
df_cagrs.groupby('years').describe()

Unnamed: 0_level_0,cagr,cagr,cagr,cagr,cagr,cagr,cagr,cagr
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
years,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,3052.0,21.991841,19.465892,-21.26517,6.630183,17.868491,32.475757,100.056922
3,2322.0,17.396759,5.975078,0.742026,13.543036,15.189254,21.57844,32.407358
5,1592.0,16.851364,4.402621,4.180631,12.869002,17.43174,20.241094,24.852087
7,862.0,18.148775,1.328453,14.195246,17.382972,18.24207,19.026482,21.068401
9,132.0,19.561056,0.626029,18.304229,19.060604,19.408051,20.208583,20.683921


In [94]:
px.density_contour(df_cagrs.set_index(['date']), x='cagr', color='years')

In [100]:
# df_cagrs[df_cagrs['years'] == 9]
px.histogram(df_cagrs, x='cagr', color='years')

In [137]:
df_cagrs['cagr_bucket'] = round(df_cagrs['cagr'] / 1, 0) * 1

In [138]:
df_total = pd.DataFrame(df_cagrs.groupby(['years']).cagr.count()).reset_index()
df_total.columns = ['years', 'total']

In [139]:
df_counts = pd.DataFrame(df_cagrs.groupby(['years', 'cagr_bucket']).cagr.count()).reset_index()
df_counts.columns = ['years', 'cagr_bucket', 'count']

In [140]:
df_x = df_counts.merge(df_total, on='years', how='left')
df_x['pct'] = df_x['count'] / df_x['total']

In [141]:
df_x['years'] = df_x['years'].astype(str)
px.bar(df_x, x='cagr_bucket', y='pct', color='years', barmode='overlay')

In [134]:
df_x

Unnamed: 0,years,cagr_bucket,count,total,pct
0,1,-21.0,2,3052,0.000655
1,1,-18.0,8,3052,0.002621
2,1,-15.0,9,3052,0.002949
3,1,-12.0,4,3052,0.001311
4,1,-9.0,16,3052,0.005242
...,...,...,...,...,...
60,7,15.0,121,862,0.140371
61,7,18.0,619,862,0.718097
62,7,21.0,122,862,0.141531
63,9,18.0,71,132,0.537879


In [146]:
df_cagrs[df_cagrs['years']==1].cagr

365     36.716937
366     35.736411
367     34.179234
368     34.411007
369     34.411007
          ...    
3412    -4.706970
3413    -4.034099
3414    -4.034099
3415    -4.435720
3416    -3.915336
Name: cagr, Length: 3052, dtype: float64

In [None]:
# Next Plans
# SIP, SWP and STP analysis