In [1]:
import pandas as pd
import numpy as np 
import joblib

In [2]:
df = pd.read_excel(r"../Excel/top_30_mutual_funds_excel.xlsx")
df.head()


Unnamed: 0,scheme_name,min_sip,min_lumpsum,expense_ratio,fund_size_cr,fund_age_yr,fund_manager,sortino_ratio,alpha,standard_deviation,...,category,sub_category,returns_1yr,returns_3yr,returns_5yr,risk-adjusted return score,cost efficiency score,consistency score,fund stability,composite_score
0,Quant Active Fund,1000,5000,0.58,3531,10,Sanjeev Sharma,3.33,13.82,18.235557,...,Equity,Multi Cap Funds,1.0,45.5,19.9,0.070188,1.724138,23.25,2047.98,7.684121
1,Kotak Multi Asset Allocator FoF – Dynamic – Di...,1000,5000,0.13,783,10,Devender Singhal,4.33,8.04,5.786382,...,Other,FoFs Domestic,11.2,25.0,15.3,0.087065,7.692308,18.1,101.79,7.233346
2,Tata Digital India Fund,150,5000,0.31,6765,7,Meeta Shetty,2.29,6.11,23.002947,...,Equity,Sectoral / Thematic Mutual Funds,-16.0,39.0,22.1,0.150573,3.225806,11.5,2097.15,4.378371
3,Kotak India Growth Fund,0,5000,0.34,90,5,Devender Singhal,3.71,7.67,12.715432,...,Equity,Multi Cap Funds,4.8,35.5,15.6,0.123859,2.941176,20.15,30.6,6.690676
4,ICICI Pru Thematic Advantage Fund,1000,5000,0.25,958,10,Dharmesh Kakkad,4.13,6.63,12.548926,...,Other,FoFs Domestic,5.4,35.4,14.6,0.147813,4.0,20.4,239.5,6.8645


In [3]:
df = df[
    [
        "scheme_name",
        "returns_5yr",
        "sharpe",
        "standard_deviation",
        "risk_bucket"
    ]
]


In [4]:
df = df.dropna().reset_index(drop=True)


In [5]:
from scipy.stats import zscore

df["z_returns_5yr"] = zscore(df["returns_5yr"])
df["z_sharpe"] = zscore(df["sharpe"])
df["z_std_dev"] = -zscore(df["standard_deviation"])  # lower risk = better


In [6]:
df["raw_target_score"] = (
    0.5 * df["z_returns_5yr"] +
    0.3 * df["z_sharpe"] +
    0.2 * df["z_std_dev"]
)


In [7]:
from sklearn.preprocessing import StandardScaler

df["target_score_z"] = StandardScaler().fit_transform(
    df[["raw_target_score"]]
).ravel()


In [8]:
FEATURES = [
    "z_returns_5yr",
    "z_sharpe",
    "z_std_dev"
]


In [9]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
from xgboost import XGBRegressor

X = df[FEATURES]
y = df["target_score_z"]

model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

model.fit(X, y)


In [11]:
RISK_MAP = {
    "Low": ["Low Risk", "Moderately Low"],
    "Moderate": ["Moderate", "Moderately Low"],
    "High": ["High Risk", "Moderate"]
}


In [12]:
def filter_by_risk(df, user_risk):
    return df[df["risk_bucket"].isin(RISK_MAP[user_risk])]


In [13]:
def recommend_top_funds(df, user_risk, top_n=5):
    filtered = filter_by_risk(df, user_risk)

    scores = model.predict(filtered[FEATURES])
    filtered = filtered.copy()
    filtered["final_score"] = scores

    return (
        filtered
        .sort_values("final_score", ascending=False)
        .head(top_n)
        [["scheme_name", "final_score", "returns_5yr", "risk_bucket"]]
    )


In [14]:
top_funds = recommend_top_funds(
    df,
    user_risk="Moderate",
    top_n=5
)

top_funds


Unnamed: 0,scheme_name,final_score,returns_5yr,risk_bucket
1,Kotak Multi Asset Allocator FoF – Dynamic – Di...,0.148546,15.3,Moderate
14,ICICI Pru Asset Allocator Fund,-0.042217,12.8,Moderately Low


In [15]:
def project_sip(monthly_amount, annual_return, years):
    r = annual_return / 100 / 12
    n = years * 12
    return monthly_amount * ((1 + r)**n - 1) / r * (1 + r)


In [16]:
def attach_projection(df, investment_type, amount, years):
    df = df.copy()

    if investment_type == "SIP":
        r = df["returns_5yr"] / 100 / 12
        n = years * 12
        df["projected_value"] = amount * (((1 + r) ** n - 1) / r) * (1 + r)

    elif investment_type == "LUMPSUM":
        r = df["returns_5yr"] / 100
        df["projected_value"] = amount * ((1 + r) ** years)

    df["investment_type"] = investment_type
    df["investment_years"] = years

    return df


In [17]:
final_output = attach_projection(
    top_funds,
    investment_type="SIP",
    amount=5000,
    years=10
)

final_output


Unnamed: 0,scheme_name,final_score,returns_5yr,risk_bucket,projected_value,investment_type,investment_years
1,Kotak Multi Asset Allocator FoF – Dynamic – Di...,0.148546,15.3,Moderate,1419330.0,SIP,10
14,ICICI Pru Asset Allocator Fund,-0.042217,12.8,Moderately Low,1218647.0,SIP,10


In [18]:
RETURN_BANDS = {
    "Low": (0, 10),
    "Moderate": (10, 15),
    "High": (15, 25)
}


In [19]:
def filter_by_return_band(df, band):
    low, high = RETURN_BANDS[band]
    return df[(df["returns_5yr"] >= low) & (df["returns_5yr"] <= high)]


In [20]:
def recommend_top_funds(df, user_risk, return_band, top_n=5):
    filtered = filter_by_risk(df, user_risk)
    filtered = filter_by_return_band(filtered, return_band)

    scores = model.predict(filtered[FEATURES])
    filtered = filtered.copy()
    filtered["final_score"] = scores

    return (
        filtered
        .sort_values("final_score", ascending=False)
        .head(top_n)
        [["scheme_name", "final_score", "returns_5yr", "risk_bucket"]]
    )


In [21]:
top_funds = recommend_top_funds(
    df,
    user_risk="High",
    return_band="High",
    top_n=5
)

top_funds


Unnamed: 0,scheme_name,final_score,returns_5yr,risk_bucket
17,Quant Infrastructure Fund,1.52462,21.1,High Risk
21,Quant Absolute Fund,1.342531,19.1,High Risk
16,Quant Small Cap Fund,1.31725,23.2,High Risk
26,AXIS Small Cap Fund,1.260418,19.4,High Risk
18,Quant Tax Plan- Direct Growth,1.235137,22.2,High Risk


In [22]:
def confidence_score(row):
    score = (
        0.5 * abs(row["z_returns_5yr"]) +
        0.3 * abs(row["z_sharpe"]) +
        0.2 * abs(row["z_std_dev"])
    )
    return min(round(score * 20, 2), 100)

In [23]:
top_funds = top_funds.merge(
    df[["scheme_name", "z_returns_5yr", "z_sharpe", "z_std_dev"]],
    on="scheme_name",
    how="left"
)

top_funds["confidence_%"] = top_funds.apply(confidence_score, axis=1)

top_funds


Unnamed: 0,scheme_name,final_score,returns_5yr,risk_bucket,z_returns_5yr,z_sharpe,z_std_dev,confidence_%
0,Quant Infrastructure Fund,1.52462,21.1,High Risk,1.273141,1.804468,-1.597636,29.95
1,Quant Absolute Fund,1.342531,19.1,High Risk,0.74092,0.888667,0.365828,14.2
2,Quant Small Cap Fund,1.31725,23.2,High Risk,1.831972,0.837789,-2.380191,32.87
3,AXIS Small Cap Fund,1.260418,19.4,High Risk,0.820753,0.5834,0.371879,13.2
4,Quant Tax Plan- Direct Growth,1.235137,22.2,High Risk,1.565862,0.430766,-1.352001,23.65


In [24]:
joblib.dump(model, "xgboost_fund_ranker.pkl")

['xgboost_fund_ranker.pkl']