In [1]:
from Utils.Solver import *
from Utils.Signals import *

In [2]:
# Let's assume we are interested in the following stocks: AAPL, MSFT, TSLA, AMZN, GOOG
#tickers = ['AAPL', 'MSFT', 'TSLA', 'AMZN', 'GOOG', 'IBM', 'NFLX', 'NVDA', 'AMD', 'INTC']
tickers = ['AAPL', 'MSFT', 'TSLA', 'AMZN', 'GOOG', 'IBM', 'NFLX', 'NVDA', 'AMD', 'INTC', 'TXN', 'QCOM', 'MU', 'ADI', 'HPQ', 'GLW', 'AVGO', 'ADP', 'INTU', 'PFE']

# Download historical stock data
data = yf.download(tickers, start='2000-01-01', end='2025-01-01')
# Initialize the portfolio solver with appropriate penalty and max weight threshold
portfolio_solver = Portfolio_Solver(0.8, max_weight_threshold=0.2 )

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  20 of 20 completed


In [3]:
start_date_signal = '2000-01-01'
end_date_signal = '2021-01-01'
date_range_signal = pd.date_range(start=start_date_signal, end=end_date_signal)

start_date_eval = '2019-01-01'
end_date_eval = '2020-01-01'
date_range_eval = pd.date_range(start=start_date_eval, end=end_date_eval)

In [4]:
# Initialize an empty list to store the rows for the first DataFrame
dataset_scores = []

# Step 1: Create the DataFrame with rsi_scores, macd_scores, and sma_scores
for date in date_range_signal:
    #print(f"processing {date}")

    # Step 1a: Calculate the RSI signal scores
    rsi_signal_scores = calculate_rsi_signal(data, tickers, date=date, period=14)
    rsi_scores = np.array([score[1] for score in rsi_signal_scores])

    # Step 1b: Calculate the MACD signal scores
    macd_signal_scores = calculate_macd_signal(data, tickers, date=date)
    macd_scores = np.array([score[1] for score in macd_signal_scores])

    # Step 1c: Calculate the SMA signal scores
    sma_signal_scores = calculate_sma_signal(data, tickers, date=date)
    sma_scores = np.array([score[1] for score in sma_signal_scores])

    rsi_avg = np.mean(rsi_scores)
    macd_avg = np.mean(macd_scores)
    sma_avg = np.mean(sma_scores)

    if np.any(np.isnan(rsi_scores)) or np.any(np.isnan(macd_scores)) or np.any(np.isnan(sma_scores)):
        print(f"Skipping {date} due to NaN values in the signals.")
        continue  # Skip this date and move to the next one

    # Step 1d: Add the scores to the first dataset
    dataset_scores.append({
        'date': date,
        'rsi_scores': rsi_scores,
        'macd_scores': macd_scores,
        'sma_scores': sma_scores,
        'rsi_avg': rsi_avg,
        'macd_avg': macd_avg,
        'sma_avg': sma_avg
    })


# Convert the first dataset into a DataFrame
df_scores = pd.DataFrame(dataset_scores)
display(df_scores)

Skipping 2000-01-01 00:00:00 due to NaN values in the signals.
Skipping 2000-01-02 00:00:00 due to NaN values in the signals.
Skipping 2000-01-03 00:00:00 due to NaN values in the signals.
Skipping 2000-01-04 00:00:00 due to NaN values in the signals.
Skipping 2000-01-05 00:00:00 due to NaN values in the signals.
Skipping 2000-01-06 00:00:00 due to NaN values in the signals.
Skipping 2000-01-07 00:00:00 due to NaN values in the signals.
Skipping 2000-01-08 00:00:00 due to NaN values in the signals.
Skipping 2000-01-09 00:00:00 due to NaN values in the signals.
Skipping 2000-01-10 00:00:00 due to NaN values in the signals.
Skipping 2000-01-11 00:00:00 due to NaN values in the signals.
Skipping 2000-01-12 00:00:00 due to NaN values in the signals.
Skipping 2000-01-13 00:00:00 due to NaN values in the signals.
Skipping 2000-01-14 00:00:00 due to NaN values in the signals.
Skipping 2000-01-15 00:00:00 due to NaN values in the signals.
Skipping 2000-01-16 00:00:00 due to NaN values in the s

KeyboardInterrupt: 

In [None]:
start_date_train = '2018-01-01'
end_date_train = '2019-01-01'
date_range_train = pd.date_range(start=start_date_train, end=end_date_train)

start_date_eval = '2018-01-01'
end_date_eval = '2019-01-01'
date_range_eval = pd.date_range(start=start_date_eval, end=end_date_eval)

# Filter the dataframe within the date range
df_train = df_scores[(df_scores['date'] >= start_date_train) & (df_scores['date'] <= end_date_train)]
display(df_train)
df_eval = df_scores[(df_scores['date'] >= start_date_eval) & (df_scores['date'] <= end_date_eval)]
display(df_eval)

In [None]:
average_annualized_return, total_return_sum = portfolio_solver.CalculateEvalReturns(tickers, data, df_eval, [1,1,1])

In [None]:
average_annualized_return, total_return_sum = portfolio_solver.CalculateEvalReturns(tickers, data, df_eval, optimal_signal_weights)

In [None]:
# Initialize an empty list to store the results
results = []

# Define the range of years you want to process (from 2011 to 2024)
for eval_year in range(2012, 2021):
    # Define the start and end dates for the training and evaluation periods
    start_date_train = f'{eval_year-1}-01-01'
    end_date_train = f'{eval_year-1}-12-31'
    start_date_eval = f'{eval_year}-01-01'
    end_date_eval = f'{eval_year}-12-31'
    
    # Filter the dataframe for the training period
    df_train = df_scores[(df_scores['date'] >= start_date_train) & (df_scores['date'] <= end_date_train)]
    # Calculate the returns for the evaluation period using the learned weights
    df_eval = df_scores[(df_scores['date'] >= start_date_eval) & (df_scores['date'] <= end_date_eval)]



    # Initial guess (equal weights)
    initial_weights = np.array([1/3, 1/3, 1/3])

    # Constraint: weights sum to 1
    constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})

    # Bounds: weights between 0 and 1
    bounds = [(0, 1) for _ in range(3)]

    # Optimize signal weights
    result = minimize(
        fun=objective,
        x0=initial_weights,
        args=(df_train, tickers, data, portfolio_solver),
        bounds=bounds,
        constraints=constraints,
        method='SLSQP',
        options={'disp': True, 'maxiter': 50}
    )

    # Final optimized weights
    optimal_signal_weights = result.x
    print("Optimized Signal Weights:", optimal_signal_weights)
    
    # Calculate returns for the evaluation period using the learned weights
    average_annualized_return, total_return_sum = portfolio_solver.CalculateEvalReturns(tickers, data, df_eval, optimal_signal_weights)

    # Calculate returns for the evaluation period using the 1/N strategy
    average_annualized_return_1n, total_return_sum_1n = portfolio_solver.CalculateEvalReturns(tickers, data, df_eval, [1, 1, 1])

    # Store the results for this year
    results.append({
        'eval_year': eval_year,
        'average_annualized_return': average_annualized_return,
        'total_return_sum': total_return_sum,
        'average_annualized_return_1n': average_annualized_return_1n,
        'total_return_sum_1n': total_return_sum_1n
    })

# Convert the results list into a DataFrame for easy display and analysis
df_results = pd.DataFrame(results)
display(df_results)

In [None]:
# Create new columns for the differences
df_results['annualized_return_diff'] = df_results['average_annualized_return'] - df_results['average_annualized_return_1n']
df_results['total_return_diff'] = df_results['total_return_sum'] - df_results['total_return_sum_1n']

# Display the updated DataFrame
print(df_results[['eval_year', 'annualized_return_diff', 'total_return_diff']])

# Sum of differences
total_annualized_diff = df_results['annualized_return_diff'].sum()
total_return_diff = df_results['total_return_diff'].sum()

print(f"Total Difference in Annualized Return: {total_annualized_diff:.4f}")
print(f"Total Difference in Total Return: {total_return_diff:.4f}")
print(f"Avg Difference in Annualized Return: {total_annualized_diff/9:.4f}")
print(f"Avg Difference in Total Return: {total_return_diff/9:.4f}")