In [1]:
import numpy as np
import pandas as pd
np.random.seed(42)

In [None]:
# Asset definitions (simple, illustrative)
assets = ["Stocks (ETF A)", "Bonds (Fund B)", "Cash (Savings)"]
# Annual expected returns (synthetic assumptions)
mu = np.array([0.08, 0.035, 0.01])  # stocks, bonds, cash
# Annual covariance matrix (synthetic, plausible)
Sigma = np.array([
    [0.18**2, 0.18*0.08*0.6, 0.18*0.01*0.2],
    [0.18*0.08*0.6, 0.08**2, 0.08*0.01*0.1],
    [0.18*0.01*0.2, 0.08*0.01*0.1, 0.01**2]
])
# Quick check sizes
assert mu.shape[0] == Sigma.shape[0] == len(assets)

# Map questionnaire-based profile to numeric risk_aversion parameter (lambda)
# Larger lambda -> more penalty on variance -> more conservative allocation
risk_map = {
    "conservative": 20.0,
    "moderate": 6.0,
    "aggressive": 1.5
}


In [None]:
# Genetic Algorithm optimizer over weights
def ga_optimize(mu, Sigma, risk_aversion, pop_size=250, generations=700, retain=0.2, random_select=0.05, mutate_chance=0.2):
    n = len(mu)
    # initialize population: Dirichlet ensures sum to 1
    # The portfolio consists of three assets: stocks, bonds, and cash.
    # Therefore, the population is represented as a list of arrays,
    # where each array contains three non-negative weights that sum to one (w_1 + w_2 + w_3 = 1).
    population = [np.random.dirichlet(np.ones(n)) for _ in range(pop_size)]
    
    def utility(w):
        # exp_ret -> expected returns 
        exp_ret = w.dot(mu)
        var = w.dot(Sigma).dot(w)
        return exp_ret - risk_aversion * var  # higher is better
    
    for gen in range(generations):
        # Evaluate
        graded = [(utility(ind), ind) for ind in population]
        graded.sort(reverse=True, key=lambda x: x[0])
        retain_len = int(len(graded)*retain)
        # keep top
        parents = [ind for _, ind in graded[:retain_len]]
        # random selection to maintain diversity
        for _, ind in graded[retain_len:]:  # for the remaining ent entries append random number if less than the random_select
            if random_select > np.random.rand():
                parents.append(ind)
        # crossover to refill population
        desired_len = pop_size - len(parents)
        children = []
        while len(children) < desired_len:
            male = parents[np.random.randint(0, len(parents))]
            female = parents[np.random.randint(0, len(parents))]
            # single-point like crossover for continuous weights: convex combination
            alpha = np.random.rand()
            child = alpha*male + (1-alpha)*female
            # mutation
            if mutate_chance > np.random.rand():
                # add small noise then project back to simplex
                mutation = np.random.normal(0, 0.05, size=n)
                child = child + mutation # element wise addition 
                # clip and renormalize to simplex
                child = np.clip(child, 0, None)
                if child.sum() <= 0:
                    child = np.random.dirichlet(np.ones(n))
                else:
                    child = child / child.sum()
            children.append(child)
        population = parents + children
        # small adaptive improvement: occasionally re-seed a few random individuals for exploration
        if gen % 200 == 0 and gen > 0:
            for i in range(int(0.02*pop_size)):
                population[-1-i] = np.random.dirichlet(np.ones(n))
    # final best
    graded = [(utility(ind), ind) for ind in population]
    graded.sort(reverse=True, key=lambda x: x[0])
    best_util, best_w = graded[0]
    best_return = best_w.dot(mu)
    best_var = best_w.dot(Sigma).dot(best_w)
    return {
        "weights": best_w,
        "utility": best_util,
        "expected_return": best_return,
        "variance": best_var
    }



In [5]:
# Run optimizer for three example profiles
results = {}
for profile in ["conservative", "moderate", "aggressive"]:
    res = ga_optimize(mu, Sigma, risk_map[profile])
    results[profile] = res

# Present results as a DataFrame
rows = []
for profile, res in results.items():
    w = res["weights"]
    rows.append({
        "Profile": profile.capitalize(),
        assets[0]: f"{w[0]*100:.1f}%",
        assets[1]: f"{w[1]*100:.1f}%",
        assets[2]: f"{w[2]*100:.1f}%",
        "Exp. Annual Return": f"{res['expected_return']*100:.2f}%",
        "Annual Variance": f"{res['variance']:.4f}"
    })

import pandas as pd
from IPython.display import display

df = pd.DataFrame(rows)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# Display the DataFrame in notebook
display(df)

# Also print plain-language explanations
explanations = []
for profile, res in results.items():
    w = res["weights"]
    exp_ret = res["expected_return"]
    var = res["variance"]
    expl = f"Profile: {profile.capitalize()}\n"
    expl += f"Recommendation: {assets[0]} {w[0]*100:.1f}%, {assets[1]} {w[1]*100:.1f}%, {assets[2]} {w[2]*100:.1f}%.\n"
    expl += ("Why: You indicated a " + profile + " risk tolerance. "
             f"Our optimization balances expected return ({exp_ret*100:.2f}% p.a.) against portfolio variance ({var:.4f}). "
             "A higher risk_aversion makes variance more costly, tilting the allocation toward bonds/cash.\n")
    expl += "Plain language explanation: We suggest this split because it aims to match your stated appetite for risk while trying to maximize expected long-term returns after accounting for volatility.\n"
    explanations.append(expl)

# Print explanations
for e in explanations:
    print(e)
    print("-"*80)


Unnamed: 0,Profile,Stocks (ETF A),Bonds (Fund B),Cash (Savings),Exp. Annual Return,Annual Variance
0,Conservative,3.1%,6.1%,90.8%,1.37%,0.0002
1,Moderate,13.5%,15.5%,71.0%,2.33%,0.0012
2,Aggressive,59.3%,40.7%,0.0%,6.17%,0.0166


Profile: Conservative
Recommendation: Stocks (ETF A) 3.1%, Bonds (Fund B) 6.1%, Cash (Savings) 90.8%.
Why: You indicated a conservative risk tolerance. Our optimization balances expected return (1.37% p.a.) against portfolio variance (0.0002). A higher risk_aversion makes variance more costly, tilting the allocation toward bonds/cash.
Plain language explanation: We suggest this split because it aims to match your stated appetite for risk while trying to maximize expected long-term returns after accounting for volatility.

--------------------------------------------------------------------------------
Profile: Moderate
Recommendation: Stocks (ETF A) 13.5%, Bonds (Fund B) 15.5%, Cash (Savings) 71.0%.
Why: You indicated a moderate risk tolerance. Our optimization balances expected return (2.33% p.a.) against portfolio variance (0.0012). A higher risk_aversion makes variance more costly, tilting the allocation toward bonds/cash.
Plain language explanation: We suggest this split because it