使用基因演算法實現最佳投資組合選擇。
每個染色體代表一個投資組合（由多支股票組成，基因為0或1，表示是否選擇該股票）。

可根據自定義的適應函數（如回報率、風險等）評估投資組合的優劣，並通過初始化族群、選擇、交配及突變，優化出最佳投資組合，最後輸出其回報率、風險和選股結果。


fitness_scores

基於該組合的夏普比率，夏普比率是衡量每單位風險所獲得的回報。

夏普比率越高，表明投資組合在風險控制下回報更好，因此適應度分數會越高。

In [12]:
import numpy as np
import pandas as pd
import random
import yfinance as yf
from typing import List, Dict

def download_taiwan_stock_data(stock_id: str, start_date: str = '2022-01-01', end_date: str = '2024-01-01') -> pd.DataFrame:
    """
    Download stock data for a given Taiwan stock ID using yfinance.

    Args:
        stock_id (str): Stock ticker symbol
        start_date (str): Start date for data retrieval
        end_date (str): End date for data retrieval

    Returns:
        pd.DataFrame: Stock price data
    """
    try:
        stock = yf.Ticker(stock_id)
        df = stock.history(start=start_date, end=end_date)
        return df
    except Exception as e:
        print(f"Error downloading data for {stock_id}: {e}")
        return pd.DataFrame()

def calculate_technical_indicators(price_series: pd.Series, risk_free_rate: float = 0.02) -> tuple:
    """
    Calculate technical indicators for a price series.

    Args:
        price_series (pd.Series): Price series
        risk_free_rate (float): Risk-free rate for Sharpe ratio calculation

    Returns:
        tuple: Annual return, annual volatility, and Sharpe ratio
    """
    # Calculate returns
    returns = price_series.pct_change().dropna()

    # Prevent division by zero or invalid calculations
    if len(returns) == 0 or returns.std() == 0:
        return 0, 0, 0

    # Annual return (assuming 252 trading days)
    annual_return = returns.mean() * 252

    # Annual volatility (assuming 252 trading days)
    annual_volatility = returns.std() * np.sqrt(252)

    # Sharpe ratio
    sharpe_ratio = (annual_return - risk_free_rate) / annual_volatility if annual_volatility != 0 else 0

    return annual_return, annual_volatility, sharpe_ratio

class GeneticPortfolioOptimizer:
    def __init__(self, stock_returns, population_size=100, max_generations=50, mutation_rate=0.05, max_stocks=5, min_stocks=3):
        self.stock_returns = stock_returns
        self.population_size = population_size
        self.max_generations = max_generations
        self.mutation_rate = mutation_rate
        self.max_stocks = max_stocks
        self.min_stocks = min_stocks
        self.num_stocks = stock_returns.shape[1]

    def initialize_population(self):
        population = []
        for _ in range(self.population_size):
            chromosome = np.zeros(self.num_stocks)
            # 限制最多選擇 5 支股票
            num_selected_stocks = random.randint(self.min_stocks, min(self.max_stocks, self.num_stocks))
            selected_indices = random.sample(range(self.num_stocks), num_selected_stocks)
            chromosome[selected_indices] = 1
            population.append(chromosome)
        return population

    def calculate_fitness(self, population):
      fitness_scores = []

      for chromosome in population:
          # 確保至少選擇一個股票
          if np.sum(chromosome) == 0:
              fitness_scores.append(float('-inf'))
              continue

          # 獲取選中的股票的回报數據
          selected_stocks_returns = self.stock_returns.iloc[:, chromosome == 1]

          # 確保選中的股票數據有效且非空
          if selected_stocks_returns.empty or selected_stocks_returns.shape[1] < 1:
              fitness_scores.append(float('-inf'))
              continue

          try:
              # 計算投資組合回報率
              portfolio_returns = selected_stocks_returns.mean(axis=1)

              # 計算年化回報、年化波動率、夏普比率
              annual_return, annual_volatility, sharpe_ratio = calculate_technical_indicators(portfolio_returns)

              # 處理 NaN 或 Inf 值
              if np.isnan(sharpe_ratio) or np.isinf(sharpe_ratio):
                  fitness_scores.append(float('-inf'))
              else:
                  fitness_scores.append(sharpe_ratio)

          except Exception:
              fitness_scores.append(float('-inf'))

      return fitness_scores

    def selection(self, population, fitness_scores):
      # 過濾掉無效的適應度值（例如 NaN 或 inf）
      fitness_scores = [score if not (np.isnan(score) or np.isinf(score)) else 0 for score in fitness_scores]

      # 正規化適應度分數，以生成有效的選擇機率
      total_fitness = sum(fitness_scores)
      selection_probs = [score / total_fitness for score in fitness_scores] if total_fitness > 0 else [1/len(fitness_scores)] * len(fitness_scores)

      new_population = []
      for _ in range(self.population_size):
          selected_index = np.random.choice(len(population), p=selection_probs)
          new_population.append(population[selected_index].copy())

      return new_population

    def crossover(self, parent1, parent2):
      crossover_point = random.randint(1, len(parent1) - 1)
      child1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
      child2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])

      # 確保每個子代不超過最大股票數量 (5 支股票)
      if np.sum(child1) > self.max_stocks:
          indices_to_remove = random.sample(range(len(child1)), int(np.sum(child1) - self.max_stocks))
          for i in indices_to_remove:
              child1[i] = 0

      if np.sum(child2) > self.max_stocks:
          indices_to_remove = random.sample(range(len(child2)), int(np.sum(child2) - self.max_stocks))
          for i in indices_to_remove:
              child2[i] = 0

      # 確保每個子代至少有 3 支股票
      if np.sum(child1) < self.min_stocks:
          indices_to_add = random.sample(range(len(child1)), int(self.min_stocks - np.sum(child1)))
          for i in indices_to_add:
              child1[i] = 1

      if np.sum(child2) < self.min_stocks:
          indices_to_add = random.sample(range(len(child2)), int(self.min_stocks - np.sum(child2)))
          for i in indices_to_add:
              child2[i] = 1

      return child1, child2

    def mutation(self, chromosome):
      # Ensure at most 5 stocks are selected in each chromosome
      if np.sum(chromosome) <= 1:
          return chromosome

      for i in range(len(chromosome)):
          if random.random() < self.mutation_rate:
              chromosome[i] = 1 - chromosome[i]

      # Ensure at most 5 stocks are selected
      if np.sum(chromosome) > self.max_stocks:
          indices_to_remove = random.sample(range(len(chromosome)), int(np.sum(chromosome) - self.max_stocks))
          for i in indices_to_remove:
              chromosome[i] = 0

      # Ensure at least 3 stocks are selected
      if np.sum(chromosome) < self.min_stocks:
          indices_to_add = random.sample(range(len(chromosome)), int(self.min_stocks - np.sum(chromosome)))
          for i in indices_to_add:
              chromosome[i] = 1

      return chromosome

    def optimize(self):
        population = self.initialize_population()
        for generation in range(self.max_generations):
            fitness_scores = self.calculate_fitness(population)
            population = self.selection(population, fitness_scores)
            next_generation = []
            while len(next_generation) < self.population_size:
                parent1, parent2 = random.sample(population, 2)
                child1, child2 = self.crossover(parent1, parent2)
                next_generation.extend([child1, child2])
            population = [self.mutation(chromosome) for chromosome in next_generation[:self.population_size]]

        final_fitness_scores = self.calculate_fitness(population)
        best_portfolio_index = np.argmax(final_fitness_scores)
        best_portfolio = population[best_portfolio_index]

        return best_portfolio

def backtest_portfolio(stock_returns, best_portfolio, initial_capital=1000000):
    selected_stocks_returns = stock_returns.iloc[:, best_portfolio == 1]
    portfolio_returns = selected_stocks_returns.mean(axis=1)

    # 初始資金
    portfolio_value = initial_capital
    portfolio_values = [portfolio_value]

    # 計算每一天的投資組合價值
    for daily_return in portfolio_returns:
        portfolio_value *= (1 + daily_return)
        portfolio_values.append(portfolio_value)

    return pd.Series(portfolio_values)

def main():
    # 股票代碼列表
    stock_ids = [
        "2330.TW", "2317.TW", "2454.TW", "2382.TW", "2308.TW",
        "2412.TW", "3711.TW", "2303.TW", "2357.TW", "3045.TW",
        "2345.TW", "6669.TW", "3231.TW", "3008.TW", "4904.TW",
        "3034.TW", "2395.TW", "2327.TW", "4938.TW", "3017.TW",
        "2301.TW", "2379.TW", "3037.TW", "3533.TW", "3653.TW",
        "2376.TW", "2356.TW", "2360.TW", "3661.TW", "6415.TW"
    ]

    stock_data = {}
    for stock_id in stock_ids:
        df_stat = download_taiwan_stock_data(stock_id)
        if not df_stat.empty:
            stock_data[stock_id] = df_stat['Close']

    stock_returns = pd.DataFrame(stock_data).pct_change().dropna()

    ga_optimizer = GeneticPortfolioOptimizer(stock_returns)
    best_portfolio = ga_optimizer.optimize()

    selected_stocks = [stock_ids[i] for i in range(len(stock_ids)) if best_portfolio[i] == 1]

    portfolio_values = backtest_portfolio(stock_returns, best_portfolio)

    # 計算最佳投資組合的績效
    selected_stocks_returns = stock_returns.iloc[:, best_portfolio == 1]
    portfolio_returns = selected_stocks_returns.mean(axis=1)
    annual_return, annual_volatility, sharpe_ratio = calculate_technical_indicators(portfolio_returns)

    print("最佳投資組合:")
    print("選股:", selected_stocks)
    print(f"年化報酗率: {annual_return:.2%}")
    print(f"年化波動率: {annual_volatility:.2%}")
    print(f"夏普比率: {sharpe_ratio:.2f}")

    # 回測結果
    print(f"回測結果（100萬起始資金）：")
    print(f"最終資產: {portfolio_values.iloc[-1]:.2f}")

if __name__ == "__main__":
    main()

  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


最佳投資組合:
選股: ['2308.TW', '2357.TW', '2345.TW', '3017.TW', '3661.TW']
年化報酗率: 66154.06%
年化波動率: 59406.26%
夏普比率: 1.11
回測結果（100萬起始資金）：
最終資產: 2456848.24
