In [38]:
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms

# Load all CSV files and add source column
file_paths = [
    ("cmf2_final_results_with_ip_value.csv", "cmf2"),
    ("nb2_final_results_with_ip_value.csv", "nb2"),
    ("nb3_final_results_with_ip_value.csv", "nb3"),
    ("gibf1_final_results_with_ip_value.csv", "gibf1")
]

# Read data and combine into one dataframe
dfs = []
for file_path, source in file_paths:
    df = pd.read_csv(file_path)
    df["Source"] = source  # Add source information
    df["Asset_Class"] = f"Asset_{file_paths.index((file_path, source)) + 1}"  # Asset_1, Asset_2, ...
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)

# Extract relevant columns
data = data[["Source", "Asset_Class", "Index", "Actual_ClosePrice", "Predicted_ClosePrice", "ip_value"]]

# Compute returns
data["Return"] = (data["Predicted_ClosePrice"] - data["Actual_ClosePrice"]) / data["Actual_ClosePrice"]

# Genetic Algorithm Parameters
POP_SIZE = 200  # Population size
CROSSOVER_RATE = 0.6  # Crossover rate
MUTATION_RATE = 0.15  # Mutation rate
GENERATIONS = 2000  # Number of generations
ELITE_SIZE = 10  # Elite size

# Fitness function: Sharpe Ratio for a single asset class
def sharpe_ratio(threshold, asset_data):
    selected_data = asset_data[asset_data["ip_value"] >= threshold]
    
    if selected_data.empty:
        return (-1.0,)  # Penalty for no assets selected
    
    # Portfolio Returns
    portfolio_return = selected_data["Return"].mean()
    portfolio_std = selected_data["Return"].std()

    if portfolio_std <= 1e-5:  # Avoid division by zero
        return (-1.0,)  # Penalty for zero standard deviation
    
    sharpe = portfolio_return / portfolio_std
    return (sharpe,)  # Return a tuple

# Genetic Algorithm Setup
if not hasattr(creator, "FitnessMax"):
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
if not hasattr(creator, "Individual"):
    creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, data["ip_value"].min(), data["ip_value"].max())
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=1)  # Single threshold
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=MUTATION_RATE)
toolbox.register("select", tools.selTournament, tournsize=3)

# Run Genetic Algorithm for a single asset class
def run_ga_for_asset(asset_data):
    def evaluate(individual):
        return sharpe_ratio(individual[0], asset_data)
    
    toolbox.register("evaluate", evaluate)
    
    population = toolbox.population(n=POP_SIZE)
    hof = tools.HallOfFame(ELITE_SIZE)  # Keep best individuals

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("max", np.max)

    algorithms.eaSimple(population, toolbox, cxpb=CROSSOVER_RATE, mutpb=MUTATION_RATE,
                        ngen=GENERATIONS, stats=stats, halloffame=hof, verbose=True)

    return hof[0][0]  # Return the optimized threshold

# Optimize thresholds for each asset class
optimized_thresholds = {}
for asset_class in data["Asset_Class"].unique():
    asset_data = data[data["Asset_Class"] == asset_class]
    optimized_threshold = run_ga_for_asset(asset_data)
    optimized_thresholds[asset_class] = optimized_threshold
    print(f"Optimized threshold for {asset_class}: {optimized_threshold}")

# Create a DataFrame to track the asset class, source, and optimized thresholds
thresholds_df = pd.DataFrame({
    "Asset_Class": data["Asset_Class"].unique(),
    "Source": [file_path[1] for file_path in file_paths],
    "Optimized_Threshold": [optimized_thresholds[asset_class] for asset_class in data["Asset_Class"].unique()]
})

# Print the DataFrame to see the optimized thresholds for each asset
print(thresholds_df)

# Select assets based on optimized thresholds
selected_assets = []
for asset_class in data["Asset_Class"].unique():
    asset_data = data[data["Asset_Class"] == asset_class]
    selected_data = asset_data[asset_data["ip_value"] >= optimized_thresholds[asset_class]]
    if not selected_data.empty:
        selected_assets.append(selected_data)

final_portfolio = pd.concat(selected_assets)

# Implement Risk Parity Model for Weights
if not final_portfolio.empty:
    volatilities = final_portfolio.groupby("Asset_Class")["Return"].std()
    volatilities = volatilities.replace(0, 1e-5)  # Avoid division by zero
    risk_parity_weights = 1 / volatilities  # Inverse volatility
    risk_parity_weights /= risk_parity_weights.sum()  # Normalize to sum to 1

    # Assign weights
    final_portfolio["Weight"] = final_portfolio["Asset_Class"].map(risk_parity_weights)
else:
    final_portfolio["Weight"] = 0  # If no assets are selected, set weight to 0

# Save final portfolio
final_portfolio.to_csv("optimized_portfolio_2.csv", index=False)
print("Optimized portfolio saved!")

gen	nevals	max
0  	200   	nan
1  	129   	nan
2  	140   	nan
3  	143   	nan
4  	127   	nan
5  	140   	nan
6  	112   	6.16621
7  	123   	6.16621
8  	123   	nan    
9  	136   	nan    
10 	134   	nan    
11 	131   	nan    
12 	135   	nan    
13 	108   	6.16621
14 	127   	6.16621
15 	138   	nan    
16 	129   	6.16621
17 	128   	6.16621
18 	128   	nan    
19 	137   	nan    
20 	135   	nan    
21 	128   	6.16621
22 	147   	6.16621
23 	128   	6.16621
24 	128   	nan    
25 	127   	6.16621
26 	137   	6.16621
27 	133   	6.16621
28 	132   	nan    
29 	129   	nan    
30 	135   	6.16621
31 	133   	6.16621
32 	134   	nan    
33 	145   	6.16621
34 	131   	nan    
35 	139   	6.16621
36 	122   	nan    
37 	133   	6.16621
38 	124   	6.16621
39 	108   	nan    
40 	140   	nan    
41 	124   	6.16621
42 	131   	6.16621
43 	125   	6.16621
44 	136   	6.16621
45 	131   	6.16621
46 	132   	6.16621
47 	136   	6.16621
48 	134   	6.16621
49 	152   	6.16621
50 	127   	6.16621
51 	140   	6.16621
52 	150   	6.16621
53

In [39]:
print(thresholds_df)

  Asset_Class Source  Optimized_Threshold
0     Asset_1   cmf2             0.888463
1     Asset_2    nb2             0.668044
2     Asset_3    nb3             0.565410
3     Asset_4  gibf1             0.683365


In [31]:
print(best_thresholds)

[0.8906469413364329, 1.0705860018214318e+18, 1.058428980706035e+19, 3.1695592453533588e+16]
