In [23]:
pip install mftool -q

In [24]:
pip install deprecated -q

In [25]:
from mftool import Mftool
import pandas as pd
import json
import ast
import warnings
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta

In [26]:
def latest_nav_with_returns(nav_list, fund_name):
    """
    Return a single-row DataFrame with fund name, latest NAV, and all possible monthly returns.

    Parameters:
    - nav_list: list of dicts [{"date":"dd-mm-yyyy","nav":"value"}, ...]
    - fund_name: string, name of the fund

    Returns:
    - pandas DataFrame with 1 row: [fund_name, date, nav, 1_month_return, 2_month_return, ... N_month_return]
    """
    # Convert to DataFrame
    df = pd.DataFrame(nav_list)
    df['date'] = pd.to_datetime(df['date'], format="%d-%m-%Y")
    df['nav'] = df['nav'].astype(float)
    df = df.sort_values('date')  # oldest to newest

    latest = df.iloc[-1]  # last row (latest date)
    result = {"fund_name": fund_name, "date": latest['date'], "nav": latest['nav']}

    # find how many months we can calculate
    total_months = (latest['date'].year - df['date'].iloc[0].year) * 12 + \
                   (latest['date'].month - df['date'].iloc[0].month)

    # Compute returns dynamically
    for m in range(1, total_months + 1):
        past_date = latest['date'] - pd.DateOffset(months=m)
        past_df = df[df['date'] <= past_date]
        if not past_df.empty:
            past_nav = past_df.iloc[-1]['nav']
            result[f"{m}_month_return"] = (latest['nav'] - past_nav) / past_nav * 100
        else:
            result[f"{m}_month_return"] = None

    return pd.DataFrame([result])

def process_scheme(scheme):
    if scheme == "Scheme Code":
        return None

    try:
        response = mf.get_scheme_historical_nav_for_dates(
            scheme,
            start_date=past,
            end_date=present,
            as_json=True
        )
        data = json.loads(response)
        return latest_nav_with_returns(data["data"], data["scheme_name"])
    except Exception as e:
        # print(f"Error processing {scheme}: {e}")
        return None

def get_top_rows(df: pd.DataFrame, sort_column: str, n: int = 10,
                 filter_value: list = None,
                 ascending: bool = False) -> pd.DataFrame:
    """
    Return top n rows from a DataFrame sorted by a given column.
    Optionally filter rows where filter_column contains filter_value.

    Parameters:
        df (pd.DataFrame): Input dataframe
        sort_column (str): Column name to sort by
        n (int): Number of rows to return (default=10)
        filter_column (str): Column to filter on (default=None)
        filter_value (str): Substring to search for in filter_column (default=None)
        ascending (bool): Sort ascending or descending (default=False -> descending)

    Returns:
        pd.DataFrame: Top n rows after filtering and sorting
    """
    if sort_column not in df.columns:
        raise ValueError(f"Column '{sort_column}' not found in DataFrame.")

    filtered_df = df

    # Apply filter if specified
    if filter_value:
        pattern = "|".join(map(str, filter_value))
        filtered_df = df[df["fund_name"].astype(str).str.contains(pattern, case=False, na=False)]

    return filtered_df.sort_values(by=sort_column, ascending=ascending).head(n)

In [27]:

warnings.filterwarnings("ignore")
mf=Mftool()
present=datetime.now().strftime('%d-%m-%Y')
past=(datetime.now()-timedelta(210)).strftime('%d-%m-%Y')
print(present)
print(past)

04-10-2025
08-03-2025


In [28]:
schemes=mf.get_scheme_codes()
# schemes

In [None]:

# Optimize with parallel threads
result = []
with ThreadPoolExecutor(max_workers=10) as executor:  # tune workers (5–20) depending on API/server limits
    futures = {executor.submit(process_scheme, scheme): scheme for scheme in schemes}

    for future in as_completed(futures):
        res = future.result()
        if res is not None:
            result.append(res)
        #print(len(result))



In [None]:
len(result)
df = pd.concat(result, ignore_index=True)

In [None]:
get_top_rows(df,
             sort_column="3_month_return",
             filter_value=["SBI","HDFC","ICICI","NIPPON"]
             )