In [15]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load your training data
df = pd.read_csv("synthetic_mutual_fund_data.csv")

# Define sector benchmarks
sector_benchmarks = {
    'Tech': 0.010,
    'Finance': 0.005,
    'Healthcare': 0.007
}

# Risk-free rate (monthly)
risk_free_rate = 0.003
window_size = 6

# Output containers
recommended_funds = []
all_predictions = []

# Group and process each Fund+Sector
for (fund, sector), group in df.groupby(['Fund', 'Sector']):
    group = group.reset_index(drop=True)
    if len(group) < window_size + 3:
        continue

    # Create sequences
    X, y_return, y_sharpe = [], [], []
    for i in range(window_size, len(group) - 3):
        past_returns = group['Monthly_Return'][i - window_size:i].values
        future_returns = group['Monthly_Return'][i:i + 3].values
        exp_ret = np.mean(future_returns)
        sharpe = (exp_ret - risk_free_rate) / (np.std(future_returns) + 1e-6)
        X.append(past_returns)
        y_return.append(exp_ret)
        y_sharpe.append(sharpe)

    X = np.array(X)
    y_return = np.array(y_return)
    y_sharpe = np.array(y_sharpe)

    # Normalize input features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X).reshape(X.shape)

    # Split data
    X_train, X_test, yret_train, yret_test, ysharpe_train, ysharpe_test = train_test_split(
        X_scaled, y_return, y_sharpe, test_size=0.2, random_state=42
    )

    # Build LSTM model
    model = Sequential()
    model.add(LSTM(32, input_shape=(window_size, 1)))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2))  # [Expected Return, Sharpe Ratio]
    model.compile(optimizer='adam', loss='mse')

    # Fit model
    y_train_combined = np.vstack((yret_train, ysharpe_train)).T
    model.fit(X_train.reshape(-1, window_size, 1), y_train_combined, epochs=30, batch_size=4, verbose=0)

    # Predict
    preds = model.predict(X_test.reshape(-1, window_size, 1))
    for i in range(len(preds)):
        pred_ret = preds[i][0]
        pred_sharpe = preds[i][1]
        all_predictions.append({
            'Fund': fund,
            'Sector': sector,
            'Predicted_Return': round(pred_ret, 4),
            'Predicted_Sharpe': round(pred_sharpe, 4)
        })
        if pred_ret > sector_benchmarks.get(sector, 0):
            recommended_funds.append({
                'Fund': fund,
                'Sector': sector,
                'Predicted_Return': round(pred_ret, 4),
                'Predicted_Sharpe': round(pred_sharpe, 4)
            })

# Save results
all_df = pd.DataFrame(all_predictions)
recommended_df = pd.DataFrame(recommended_funds)

all_df.to_csv("All_Funds_Prediction.csv", index=False)
recommended_df.to_csv("Recommended_Funds.csv", index=False)

print("- All_Funds_Prediction.csv saved.")
print("- Recommended_Funds.csv saved.")

FileNotFoundError: [Errno 2] No such file or directory: 'synthetic_mutual_fund_data.csv'

In [None]:
# This Code used to generate 3000 rows of synthetic mutual fund data
import numpy as np
import pandas as pd

np.random.seed(42)

funds = {
    'Tech': ['FundA', 'FundD', 'FundF', 'FundG', 'FundH'],
    'Finance': ['FundB', 'FundE', 'FundI', 'FundJ', 'FundK'],
    'Healthcare': ['FundC', 'FundL', 'FundM', 'FundN', 'FundO']
}

sector_means = {'Tech': 0.01, 'Finance': 0.005, 'Healthcare': 0.007}
sector_stds = {'Tech': 0.005, 'Finance': 0.003, 'Healthcare': 0.004}

rows = []
months_per_fund = 200  # so 5 funds * 3 sectors * 200 = 3000 rows

for sector, fund_list in funds.items():
    mean = sector_means[sector]
    std = sector_stds[sector]
    for fund in fund_list:
        returns = np.random.normal(loc=mean, scale=std, size=months_per_fund)
        # clip returns roughly between -0.02 and 0.03
        returns = np.clip(returns, -0.02, 0.03)
        for ret in returns:
            rows.append([fund, sector, round(ret, 4)])

df = pd.DataFrame(rows, columns=['Fund', 'Sector', 'Monthly_Return'])
df.to_csv('synthetic_mutual_fund_data.csv', index=False)
print(f"Generated {len(df)} rows of synthetic mutual fund data.")