In [None]:
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms

# Load all CSV files and add source column
file_paths = [
    ("cmf2_final_results_with_ip_value.csv", "cmf2"),
    ("nb2_final_results_with_ip_value.csv", "nb2"),
    ("nb3_final_results_with_ip_value.csv", "nb3"),
    ("gibf1_final_results_with_ip_value.csv", "gibf1")
]

# Read data and combine into one dataframe
dfs = []
for file_path, source in file_paths:
    df = pd.read_csv(file_path)
    df["Source"] = source  # Add source information
    df["Asset_Class"] = f"Asset_{file_paths.index((file_path, source)) + 1}"  # Asset_1, Asset_2, ...
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)

# Extract relevant columns
data = data[["Source", "Asset_Class", "Index", "Actual_ClosePrice", "Predicted_ClosePrice", "ip_value"]]

# Compute returns
data["Return"] = (data["Predicted_ClosePrice"] - data["Actual_ClosePrice"]) / data["Actual_ClosePrice"]

# Genetic Algorithm Parameters
POP_SIZE = 500  # Increased population size
CROSSOVER_RATE = 0.6  # Slightly decreased crossover rate
MUTATION_RATE = 0.15  # Slightly increased mutation rate
GENERATIONS = 2000  # Increased number of generations
ELITE_SIZE = 10  # Increased elite size

# Fitness function: Sharpe Ratio
def sharpe_ratio(thresholds):
    selected_assets = []
    
    # thresholds is a list with one threshold per asset class
    for i, asset_class in enumerate(data["Asset_Class"].unique()):
        asset_data = data[data["Asset_Class"] == asset_class]
        selected_data = asset_data[asset_data["ip_value"] >= thresholds[i]]
        
        if not selected_data.empty:
            selected_assets.append(selected_data)
    
    if len(selected_assets) == 0:
        return (-1.0,)  # Penalty for no assets selected
    
    selected_assets = pd.concat(selected_assets)
    
    # Portfolio Returns
    portfolio_return = selected_assets["Return"].mean()
    portfolio_std = selected_assets["Return"].std()

    if portfolio_std <= 1e-5:  # Avoid division by zero
        return (-1.0,)  # Penalty for zero standard deviation
    
    sharpe = portfolio_return / portfolio_std
    return (sharpe,)  # Return a tuple

# Genetic Algorithm Setup
if not hasattr(creator, "FitnessMax"):
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
if not hasattr(creator, "Individual"):
    creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, data["ip_value"].min(), data["ip_value"].max())
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=len(data["Asset_Class"].unique()))  # One threshold per asset class
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=MUTATION_RATE)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", sharpe_ratio)  # Directly use sharpe_ratio

# Run Genetic Algorithm
def run_ga():
    population = toolbox.population(n=POP_SIZE)
    hof = tools.HallOfFame(ELITE_SIZE)  # Keep best individuals

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("max", np.max)

    algorithms.eaSimple(population, toolbox, cxpb=CROSSOVER_RATE, mutpb=MUTATION_RATE,
                        ngen=GENERATIONS, stats=stats, halloffame=hof, verbose=True)

    return hof[0]

# Get the optimized ip-value thresholds
best_thresholds = run_ga()

# Create a DataFrame to track the asset class, source, and optimized thresholds
thresholds_df = pd.DataFrame({
    "Asset_Class": data["Asset_Class"].unique(),
    "Source": [file_path[1] for file_path in file_paths],
    "Optimized_Threshold": best_thresholds
})

# Print the DataFrame to see the optimized thresholds for each asset
print(thresholds_df)

# Select assets based on optimized thresholds
selected_assets = []
for i, asset_class in enumerate(data["Asset_Class"].unique()):
    asset_data = data[data["Asset_Class"] == asset_class]
    selected_data = asset_data[asset_data["ip_value"] >= best_thresholds[i]]
    if not selected_data.empty:
        selected_assets.append(selected_data)

final_portfolio = pd.concat(selected_assets)

# Implement Risk Parity Model for Weights
if not final_portfolio.empty:
    volatilities = final_portfolio.groupby("Asset_Class")["Return"].std()
    volatilities = volatilities.replace(0, 1e-5)  # Avoid division by zero
    risk_parity_weights = 1 / volatilities  # Inverse volatility
    risk_parity_weights /= risk_parity_weights.sum()  # Normalize to sum to 1

    # Assign weights
    final_portfolio["Weight"] = final_portfolio["Asset_Class"].map(risk_parity_weights)
else:
    final_portfolio["Weight"] = 0  # If no assets are selected, set weight to 0

# Save final portfolio
final_portfolio.to_csv("optimized_portfolio_1.csv", index=False)
print("Optimized portfolio saved!")


gen	nevals	max    
0  	500   	0.57979
1  	348   	0.695658
2  	320   	0.709883
3  	320   	0.716485
4  	328   	0.716485
5  	331   	0.795752
6  	306   	0.818137
7  	332   	6.16621 
8  	331   	6.16621 
9  	326   	6.16621 
10 	317   	6.16621 
11 	356   	6.16621 
12 	339   	nan     
13 	307   	nan     
14 	323   	nan     
15 	315   	nan     
16 	342   	nan     
17 	339   	nan     
18 	327   	nan     
19 	344   	nan     
20 	309   	nan     
21 	308   	nan     
22 	357   	6.16621 
23 	338   	6.16621 
24 	330   	nan     
25 	342   	nan     
26 	298   	nan     
27 	328   	nan     
28 	324   	6.16621 
29 	333   	nan     
30 	313   	nan     
31 	336   	nan     
32 	316   	nan     
33 	320   	nan     
34 	343   	6.16621 
35 	319   	6.16621 


In [28]:
print(thresholds_df)

  Asset_Class Source  Optimized_Threshold
0     Asset_1   cmf2         8.906469e-01
1     Asset_2    nb2         1.070586e+18
2     Asset_3    nb3         1.058429e+19
3     Asset_4  gibf1         3.169559e+16


In [30]:
thresholds_df.to_csv("thresholds.csv", index=False)

In [31]:
print(best_thresholds)

[0.8906469413364329, 1.0705860018214318e+18, 1.058428980706035e+19, 3.1695592453533588e+16]
