In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from Utils.Solver import *
from Utils.Signals import *

# Prepare data

In [2]:
# Download historical stock data
tickers = ['AAPL', 'MSFT', 'TSLA', 'AMZN', 'GOOG', 'IBM', 'NFLX', 'NVDA', 'AMD', 'INTC']

# Download historical stock data
data = yf.download(tickers, start='2000-01-01', end='2024-01-01')
# Initialize the portfolio solver with appropriate penalty and max weight threshold
portfolio_solver = Portfolio_Solver(0.8, max_weight_threshold=0.3)


start_date_signal = '2000-01-01'
end_date_signal = '2021-01-01'
date_range_signal = pd.date_range(start=start_date_signal, end=end_date_signal)

start_date_eval = '2019-01-01'
end_date_eval = '2020-01-01'
date_range_eval = pd.date_range(start=start_date_eval, end=end_date_eval)


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  10 of 10 completed


In [3]:
# Initialize an empty list to store the rows for the first DataFrame
dataset_scores = []

# Step 1: Create the DataFrame with rsi_scores, macd_scores, and sma_scores
for date in date_range_signal:
    #print(f"processing {date}")

    # Step 1a: Calculate the RSI signal scores
    rsi_signal_scores = calculate_rsi_signal(data, tickers, date=date, period=14)
    rsi_scores = np.array([score[1] for score in rsi_signal_scores])

    # Step 1b: Calculate the MACD signal scores
    macd_signal_scores = calculate_macd_signal(data, tickers, date=date)
    macd_scores = np.array([score[1] for score in macd_signal_scores])

    # Step 1c: Calculate the SMA signal scores
    sma_signal_scores = calculate_sma_signal(data, tickers, date=date)
    sma_scores = np.array([score[1] for score in sma_signal_scores])

    if np.any(np.isnan(rsi_scores)) or np.any(np.isnan(macd_scores)) or np.any(np.isnan(sma_scores)):
        print(f"Skipping {date} due to NaN values in the signals.")
        continue  # Skip this date and move to the next one

    # Step 1d: Add the scores to the first dataset
    dataset_scores.append({
        'date': date,
        'rsi_scores': rsi_scores,
        'macd_scores': macd_scores,
        'sma_scores': sma_scores
    })


# Convert the first dataset into a DataFrame
df_scores = pd.DataFrame(dataset_scores)
display(df_scores)

Skipping 2000-01-01 00:00:00 due to NaN values in the signals.
Skipping 2000-01-02 00:00:00 due to NaN values in the signals.
Skipping 2000-01-03 00:00:00 due to NaN values in the signals.
Skipping 2000-01-04 00:00:00 due to NaN values in the signals.
Skipping 2000-01-05 00:00:00 due to NaN values in the signals.
Skipping 2000-01-06 00:00:00 due to NaN values in the signals.
Skipping 2000-01-07 00:00:00 due to NaN values in the signals.
Skipping 2000-01-08 00:00:00 due to NaN values in the signals.
Skipping 2000-01-09 00:00:00 due to NaN values in the signals.
Skipping 2000-01-10 00:00:00 due to NaN values in the signals.
Skipping 2000-01-11 00:00:00 due to NaN values in the signals.
Skipping 2000-01-12 00:00:00 due to NaN values in the signals.
Skipping 2000-01-13 00:00:00 due to NaN values in the signals.
Skipping 2000-01-14 00:00:00 due to NaN values in the signals.
Skipping 2000-01-15 00:00:00 due to NaN values in the signals.
Skipping 2000-01-16 00:00:00 due to NaN values in the s

Unnamed: 0,date,rsi_scores,macd_scores,sma_scores
0,2011-04-12,"[38.30986573043745, 45.97699524910194, 49.2629...","[-0.04128610842679266, 0.07719798775124406, 0....","[1.2408265471458435, 0.36537283897399675, 0.01..."
1,2011-04-13,"[43.374396724653025, 45.79289012109328, 50.769...","[-0.032083465458962, 0.05630159823556917, 2.25...","[1.2234384632110586, 0.3209991550445537, 0.016..."
2,2011-04-14,"[39.868438905856806, 41.994001106463216, 51.92...","[-0.031152784552330004, 0.03080832054617856, -...","[1.2041048002243055, 0.2738248443603517, 0.017..."
3,2011-04-15,"[35.71251675197465, 41.1191095856149, 54.33519...","[-0.037784901050468056, 0.01157664891800049, -...","[1.1826029300689687, 0.23108913421630817, 0.01..."
4,2011-04-18,"[41.52287180348995, 36.385285380491595, 50.896...","[-0.03076663401048574, -0.014564692106692326, ...","[1.16100930929184, 0.18357373237609664, 0.0192..."
...,...,...,...,...
2443,2020-12-24,"[67.92107835745412, 61.41404300176869, 62.8719...","[0.6376332706635273, 0.8196518403346595, -1.10...","[20.27695791244507, 16.26273788452147, 64.3665..."
2444,2020-12-28,"[73.6475511079569, 64.36468600812431, 63.14165...","[0.877414719704313, 0.8857981921597815, -1.163...","[20.2629054069519, 16.051128234863285, 64.8746..."
2445,2020-12-29,"[68.56490335395678, 62.478919444792034, 63.483...","[0.8423501602594863, 0.8160780141638602, -1.22...","[20.211753692626957, 15.715760116577144, 65.42..."
2446,2020-12-30,"[65.4894834607882, 56.99500166168995, 67.54592...","[0.676703086459225, 0.560794954330444, -0.7188...","[20.215960712432874, 15.502695999145516, 66.16..."


In [4]:
# Now, iterate over df_step1 to calculate combined scores, portfolio weights, and returns
dataset_returns = []

# Filter the DataFrame to only include rows where the 'date' is in the specified date range
filtered_df = df_scores[df_scores['date'].isin(date_range_eval)]

# Step 2: Create the second DataFrame with combined_scores, portfolio_weights, total_return, and annualized_return
for index, row in filtered_df.iterrows():
    date = row['date']
    rsi_scores = row['rsi_scores']
    macd_scores = row['macd_scores']
    sma_scores = row['sma_scores']

    print(f"Processing {date} for Step 2")

    # Step 2a: Combine the signals (you can later train a model to adjust these weights)
    signal_weights = [1, 1, 1]  # You can adjust these weights later based on your model
    combined_scores = combine_signals(signal_weights, [rsi_scores, macd_scores, sma_scores])
    combined_scores_with_tickers = list(zip(tickers, combined_scores))

    print(f"Combined Scores: {combined_scores_with_tickers}")

    # Step 2b: Solve the portfolio based on the combined signal scores
    portfolio_weights = portfolio_solver.SolveSignalPortfolioMVO(tickers, data, combined_scores)

    # Step 2c: Calculate the returns for the portfolio based on the optimized weights
    cumulative_returns, total_return, annualized_return = portfolio_solver.CalculatePortfolioReturns(tickers, data, portfolio_weights, start_date=date, time_period=252)

    # Step 2d: Add the calculated values to the second dataset
    dataset_returns.append({
        'date': date,
        'combined_scores': combined_scores,
        'portfolio_weights': portfolio_weights,
        'total_return': total_return,
        'annualized_return': annualized_return
    })

# Convert the second dataset into a DataFrame
df_returns = pd.DataFrame(dataset_returns)

# You can now display or use the two DataFrames for further analysis or training your model
display(df_returns)  # The second DataFrame with combined scores, portfolio weights, and returns

Processing 2019-01-02 00:00:00 for Step 2
Combined Scores: [('AAPL', np.float64(36.56631789465289)), ('MSFT', np.float64(47.12125198121907)), ('TSLA', np.float64(42.99442018025284)), ('AMZN', np.float64(44.22458897282027)), ('GOOG', np.float64(48.27451049963411)), ('IBM', np.float64(32.95339554591487)), ('NFLX', np.float64(1.8540117612227434)), ('NVDA', np.float64(36.84819755295941)), ('AMD', np.float64(49.03939929273357)), ('INTC', np.float64(49.33618450170021))]
     pcost       dcost       gap    pres   dres
 0: -1.0724e-01 -3.3443e+00  3e+00  3e-17  3e-16
 1: -1.0824e-01 -1.8473e-01  8e-02  2e-16  6e-16
 2: -1.2061e-01 -1.3096e-01  1e-02  1e-16  1e-16
 3: -1.2518e-01 -1.2820e-01  3e-03  1e-16  9e-17
 4: -1.2596e-01 -1.2684e-01  9e-04  2e-16  8e-17
 5: -1.2649e-01 -1.2656e-01  7e-05  2e-16  4e-17
 6: -1.2652e-01 -1.2652e-01  7e-07  2e-16  8e-17
 7: -1.2652e-01 -1.2652e-01  7e-09  2e-16  7e-17
Optimal solution found.
Optimized Portfolio Weights:
{'AAPL': np.float64(1.561236416306662e

Unnamed: 0,date,combined_scores,portfolio_weights,total_return,annualized_return
0,2019-01-02,"[36.56631789465289, 47.12125198121907, 42.9944...","[1.561236416306662e-08, 0.10000008346708802, 2...",0.558830,0.558830
1,2019-01-03,"[25.84613211908394, 41.100328544961805, 40.411...","[1.3916513398473744e-09, 0.2999999650994345, 1...",0.888180,0.888180
2,2019-01-04,"[32.75334228541379, 49.894593074283755, 47.042...","[9.275004305438516e-09, 0.10000004992737668, 4...",0.576003,0.576003
3,2019-01-07,"[32.23303646516659, 50.148796804759634, 52.808...","[8.69787106603861e-08, 2.173625194221623e-06, ...",0.341094,0.341094
4,2019-01-08,"[35.33224788538981, 51.63730397187763, 53.0730...","[7.676857671101088e-09, 2.2607272275521393e-07...",0.376770,0.376770
...,...,...,...,...,...
247,2019-12-24,"[85.39354217137338, 88.09684583637355, 89.5700...","[0.2999999322834213, 0.2999998068738026, 0.299...",0.979389,0.979389
248,2019-12-26,"[89.82008494933734, 90.95866172287342, 90.6822...","[0.29999923045323035, 0.29999940102352957, 0.2...",0.918458,0.918458
249,2019-12-27,"[89.70344106914686, 91.6771471789509, 90.30326...","[0.29999998390958854, 0.29999998390807087, 0.2...",0.929386,0.929386
250,2019-12-30,"[91.04786956849668, 82.45904843667185, 76.9975...","[0.29999999466123645, 0.2999999747252475, 0.23...",1.012929,1.012929


Calculate all the return for each of those dates



In [6]:
average_annualized_return = df_returns['annualized_return'].mean()
print("Average return for 1/N : ", average_annualized_return)

total_return_sum = df_returns['total_return'].sum()
print(f"Total Return Sum: {total_return_sum}")

Average return for 1/N :  0.9146849132211731
Total Return Sum: 230.50059813173561


### Matrix of size signals 

In [8]:

# Define the target date range
start_date = '2011-01-01'
end_date = '2018-01-01'
date_range = pd.date_range(start=start_date, end=end_date)

# Initialize an empty list to store the rows of the dataset
dataset_weighted_returns = []
# Filter the DataFrame to only include rows where the 'date' is in the specified date range
filtered_df = df_scores[df_scores['date'].isin(date_range_eval)]

# Step 2: Create the second DataFrame with combined_scores, portfolio_weights, total_return, and annualized_return
for index, row in filtered_df.iterrows():
    date = row['date']
    rsi_scores = row['rsi_scores']
    macd_scores = row['macd_scores']
    sma_scores = row['sma_scores']

    if np.any(np.isnan(rsi_scores)) or np.any(np.isnan(macd_scores)) or np.any(np.isnan(sma_scores)):
        #print(f"Skipping {date} due to NaN values in the signals.")
        continue  # Skip this date and move to the next one

    # Calculate the average scores for each signal type (RSI, MACD, SMA)
    avg_rsi = np.mean(rsi_scores)
    avg_macd = np.mean(macd_scores)
    avg_sma = np.mean(sma_scores)

    # Add the aggregated values to the dataset
    dataset_weighted_returns.append({
        'date': date,
        'rsi_avg': avg_rsi,
        'macd_avg': avg_macd,
        'sma_avg': avg_sma,
        'rsi_scores': rsi_scores,
        'macd_scores': macd_scores,
        'sma_scores': sma_scores,
        'total_return': total_return 
    })

# Convert the dataset into a DataFrame
df = pd.DataFrame(dataset_weighted_returns)

# Prepare the feature matrix X (aggregated values for RSI, MACD, SMA)
X = df[['rsi_avg', 'macd_avg', 'sma_avg']].values

# Prepare the target y (total return)
y = df['total_return'].values.reshape(-1, 1)

# Initialize and train a linear model to learn the weight matrix W (size should be 3)
W, _, _, _ = np.linalg.lstsq(X, y, rcond=None)

# Output the learned weight matrix W
print("Learned Weight Matrix (W):")
print(W)


Learned Weight Matrix (W):
[[ 0.01807488]
 [-0.11219953]
 [ 0.00577019]]


In [11]:

# Now, iterate over df_step1 to calculate combined scores, portfolio weights, and returns
dataset_weighted_returns = []

# Filter the DataFrame to only include rows where the 'date' is in the specified date range
filtered_df = df_scores[df_scores['date'].isin(date_range_eval)]

# Step 2: Create the second DataFrame with combined_scores, portfolio_weights, total_return, and annualized_return
for index, row in filtered_df.iterrows():
    date = row['date']
    rsi_scores = row['rsi_scores']
    macd_scores = row['macd_scores']
    sma_scores = row['sma_scores']

    print(f"Processing {date} for Step 2")

    # Step 2a: Combine the signals (you can later train a model to adjust these weights)
    signal_weights = W  # You can adjust these weights later based on your model
    combined_scores = combine_signals(signal_weights, [rsi_scores, macd_scores, sma_scores])
    combined_scores_with_tickers = list(zip(tickers, combined_scores))

    print(f"Combined Scores: {combined_scores_with_tickers}")

    # Step 2b: Solve the portfolio based on the combined signal scores
    portfolio_weights = portfolio_solver.SolveSignalPortfolioMVO(tickers, data, combined_scores)

    # Step 2c: Calculate the returns for the portfolio based on the optimized weights
    cumulative_returns, total_return, annualized_return = portfolio_solver.CalculatePortfolioReturns(tickers, data, portfolio_weights, start_date=date, time_period=252)

    # Step 2d: Add the calculated values to the second dataset
    dataset_weighted_returns.append({
        'date': date,
        'combined_scores': combined_scores,
        'portfolio_weights': portfolio_weights,
        'total_return': total_return,
        'annualized_return': annualized_return
    })


# Convert the dataset into a DataFrame
df2 = pd.DataFrame(dataset_weighted_returns)

# Display the dataset (this will include the RSI, MACD, SMA scores, portfolio weights, and returns)

# You can now use this DataFrame for training your model.
display(df2)

Processing 2019-01-02 00:00:00 for Step 2
Combined Scores: [('AAPL', np.float64(0.6565293835064173)), ('MSFT', np.float64(0.8499196782980052)), ('TSLA', np.float64(0.7889826541128233)), ('AMZN', np.float64(0.8443501430128046)), ('GOOG', np.float64(0.8890464534195123)), ('IBM', np.float64(0.7496489647571939)), ('NFLX', np.float64(0.38748426235741573)), ('NVDA', np.float64(0.6794632361237752)), ('AMD', np.float64(0.863056421608356)), ('INTC', np.float64(0.9125880296517727))]
     pcost       dcost       gap    pres   dres
 0: -1.0310e-01 -3.2751e+00  3e+00  2e-16  3e-16
 1: -1.0351e-01 -1.6980e-01  7e-02  1e-16  6e-16
 2: -1.1277e-01 -1.2130e-01  9e-03  1e-16  5e-17
 3: -1.1625e-01 -1.1825e-01  2e-03  2e-16  2e-17
 4: -1.1716e-01 -1.1763e-01  5e-04  2e-16  5e-17
 5: -1.1742e-01 -1.1743e-01  2e-05  2e-16  5e-17
 6: -1.1742e-01 -1.1742e-01  2e-07  2e-16  3e-17
 7: -1.1742e-01 -1.1742e-01  2e-09  2e-16  6e-17
Optimal solution found.
Optimized Portfolio Weights:
{'AAPL': np.float64(3.7742394

Unnamed: 0,date,combined_scores,portfolio_weights,total_return,annualized_return
0,2019-01-02,"[0.6565293835064173, 0.8499196782980052, 0.788...","[3.774239435633503e-09, 0.1000000285225567, 3....",0.558830,0.558830
1,2019-01-03,"[0.4906071665015242, 0.7577316613145237, 0.747...","[1.5519033836886772e-08, 0.2999996751009861, 0...",0.724950,0.724950
2,2019-01-04,"[0.6197170841413654, 0.8877756376859293, 0.858...","[2.9617487319784596e-09, 0.10000003595668022, ...",0.576003,0.576003
3,2019-01-07,"[0.6127795795699847, 0.8709426264902623, 0.945...","[8.53559155666152e-09, 3.53887810533279e-08, 0...",0.389856,0.389856
4,2019-01-08,"[0.6633240883858779, 0.8764884203616479, 0.937...","[7.754802614069454e-09, 2.597722476099919e-08,...",0.433699,0.433699
...,...,...,...,...,...
247,2019-12-24,"[1.3922539069380944, 1.4119826442580137, 1.505...","[0.09866667694372423, 0.2999989525203422, 0.29...",1.050354,1.050354
248,2019-12-26,"[1.4618809725810713, 1.457522151077978, 1.5218...","[0.29999949102415924, 0.2999995171528868, 0.29...",0.918426,0.918426
249,2019-12-27,"[1.45605007553598, 1.4680243648503433, 1.51685...","[0.29999999334106003, 0.2999999906272624, 0.29...",0.929386,0.929386
250,2019-12-30,"[1.4772834917872781, 1.3147345046646193, 1.289...","[0.29999999227584084, 0.2999999595661179, 0.29...",0.983747,0.983747


In [12]:
average_annualized_return = df2['annualized_return'].mean()
print("Average return for trained matrix : ", average_annualized_return)

total_return_sum = df2['total_return'].sum()
print(f"Total Return Sum: {total_return_sum}")

Average return for trained matrix :  0.8828853999014219
Total Return Sum: 222.48712077515833


In [None]:
average_annualized_return = df2['annualized_return'].mean()
print("Average return for trained matrix : ", average_annualized_return)

total_return_sum = df2['total_return'].sum()
print(f"Total Return Sum: {total_return_sum}")

Average return for trained matrix :  0.8311076613052832
Total Return Sum: 14.206047371171156


### Matrix of size tickers x signals 

In [13]:
# Create the feature matrix dynamically
X = np.array([[
    row['rsi_scores'][i],  # Get RSI score for the i-th ticker
    row['macd_scores'][i],  # Get MACD score for the i-th ticker
    row['sma_scores'][i]    # Get SMA score for the i-th ticker
] for _, row in df.iterrows() for i in range(len(tickers))]).reshape(len(df), len(tickers) * 3)

# Ensure the shape of X is correct (it should be 2D with each row representing a data point)
#print("Shape of X:", X.shape)
#print(X)

# Target (returns), you can choose 'total_return' or 'annualized_return' based on your preference
y = df['total_return'].values.reshape(-1, 1)

# Initialize and train a linear model to learn the weight matrix W
# Using np.linalg.lstsq to solve the least squares problem, i.e., Y = XW
W2, _, _, _ = np.linalg.lstsq(X, y, rcond=None)

# Output the learned weight matrix W
print("Learned Weight Matrix (W):", W2)


Learned Weight Matrix (W): [[ 1.13021847e-03]
 [-9.92585449e-02]
 [ 7.29135065e-03]
 [ 1.46390422e-04]
 [-1.13610617e-02]
 [ 6.93767857e-02]
 [ 1.31213923e-03]
 [-3.21514521e-02]
 [-3.85331532e-03]
 [-2.00584979e-03]
 [ 5.08790920e-02]
 [-3.24205365e-02]
 [ 7.08759475e-04]
 [-1.25702522e-02]
 [ 1.25455783e-02]
 [ 3.52396742e-04]
 [-2.59149473e-02]
 [ 7.08581885e-03]
 [ 9.66663646e-04]
 [-7.27360899e-03]
 [-2.20902949e-03]
 [-1.69013855e-03]
 [ 4.70302960e-01]
 [-4.60857021e-01]
 [ 1.52986632e-03]
 [-2.10192925e-02]
 [-1.42347532e-02]
 [-3.48738511e-04]
 [ 7.01920446e-02]
 [ 4.32205631e-02]]


In [15]:
# Iterate over the rows in df_scores to calculate the combined scores
dataset_weighted_returns = []

for index, row in filtered_df.iterrows():
    date = row['date']
    rsi_scores = row['rsi_scores']
    macd_scores = row['macd_scores']
    sma_scores = row['sma_scores']

    print(f"Processing {date} for Step 2")

    # Step 2a: Combine the signals using the learned weights
    # Reshape W into a 3xN matrix (3 signals and N stocks)
    # W is the learned weight matrix with size (3*N, 1), so we need to reshape it into a 3xN matrix
    W_reshaped = W2.reshape(3, len(tickers))

    # Multiply the scores by the weights
    weighted_rsi_scores = rsi_scores * W_reshaped[0, :]
    weighted_macd_scores = macd_scores * W_reshaped[1, :]
    weighted_sma_scores = sma_scores * W_reshaped[2, :]

    # Combine the weighted scores for each stock
    combined_scores = weighted_rsi_scores + weighted_macd_scores + weighted_sma_scores

    # Pair the combined scores with tickers
    combined_scores_with_tickers = list(zip(tickers, combined_scores))

    print(f"Combined Scores: {combined_scores_with_tickers}")

    # Step 2b: Solve the portfolio based on the combined signal scores
    portfolio_weights = portfolio_solver.SolveSignalPortfolioMVO(tickers, data, combined_scores)

    # Step 2c: Calculate the returns for the portfolio based on the optimized weights
    cumulative_returns, total_return, annualized_return = portfolio_solver.CalculatePortfolioReturns(tickers, data, portfolio_weights, start_date=date, time_period=252)

    # Step 2d: Add the calculated values to the dataset
    dataset_weighted_returns.append({
        'date': date,
        'combined_scores': combined_scores,
        'portfolio_weights': portfolio_weights,
        'total_return': total_return,
        'annualized_return': annualized_return
    })

# Convert the dataset into a DataFrame
df2 = pd.DataFrame(dataset_weighted_returns)

# Display the dataset (this will include the RSI, MACD, SMA scores, portfolio weights, and returns)
display(df2)

average_annualized_return = df2['annualized_return'].mean()
print("Average return for trained matrix : ", average_annualized_return)

total_return_sum = df2['total_return'].sum()
print(f"Total Return Sum: {total_return_sum}")


Processing 2019-01-02 00:00:00 for Step 2
Combined Scores: [('AAPL', np.float64(0.057045658800513356)), ('MSFT', np.float64(-4.427170508835154)), ('TSLA', np.float64(1.0633631612289738)), ('AMZN', np.float64(2.140564943693846)), ('GOOG', np.float64(-0.5838119687112676)), ('IBM', np.float64(3.436350459618693)), ('NFLX', np.float64(0.7128205237480932)), ('NVDA', np.float64(-1.2305522410508511)), ('AMD', np.float64(-0.08453424585279305)), ('INTC', np.float64(-0.18575286909790037))]
     pcost       dcost       gap    pres   dres
 0: -2.4533e+01 -1.0744e+01  2e+02  2e+01  3e-16
 1: -1.9979e+00 -9.8979e+00  1e+01  3e-01  8e-16
 2: -1.8036e+00 -2.6753e+00  9e-01  4e-03  1e-15
 3: -2.1843e+00 -2.3639e+00  2e-01  7e-04  2e-16
 4: -2.2884e+00 -2.3033e+00  2e-02  6e-05  1e-16
 5: -2.2977e+00 -2.2978e+00  2e-04  6e-07  4e-16
 6: -2.2978e+00 -2.2978e+00  2e-06  6e-09  4e-16
Optimal solution found.
Optimized Portfolio Weights:
{'AAPL': np.float64(2.2745672598464147e-07), 'MSFT': np.float64(3.576262

Unnamed: 0,date,combined_scores,portfolio_weights,total_return,annualized_return
0,2019-01-02,"[0.057045658800513356, -4.427170508835154, 1.0...","[2.2745672598464147e-07, 3.576262185615388e-09...",0.302959,0.302959
1,2019-01-03,"[0.03692519105649331, -3.8590382890553245, 1.0...","[1.5744239375054436e-07, 2.152841081653088e-09...",0.381471,0.381471
2,2019-01-04,"[0.045609799928320316, -4.73379349493333, 1.15...","[4.110541760083327e-09, 6.534538771550285e-11,...",0.297312,0.297312
3,2019-01-07,"[0.046712135826038254, -4.7622068993733215, 1....","[3.4886972094968425e-09, 1.0313859067969994e-1...",0.297430,0.297430
4,2019-01-08,"[0.05458746534555111, -4.903477796377104, 1.24...","[5.929303873834883e-09, 1.434706731410943e-10,...",0.277774,0.277774
...,...,...,...,...,...
247,2019-12-24,"[0.06784959232594519, -7.528995267602042, 2.87...","[0.09999515117808651, 0.29999999827024204, 4.6...",0.557826,0.557826
248,2019-12-26,"[0.07573666845724077, -7.799007382664905, 2.96...","[2.921323351924522e-06, 0.2999999984267615, 4....",0.810230,0.810230
249,2019-12-27,"[0.07589269255042111, -7.855743884273441, 3.04...","[1.3140990852967519e-06, 0.2999999986200546, 3...",0.829035,0.829035
250,2019-12-30,"[0.077411786959616, -6.934271944653797, 3.0216...","[0.0999997908380993, 0.2999999989049219, 2.584...",0.581081,0.581081


Average return for trained matrix :  0.5335396080365482
Total Return Sum: 134.45198122521015


# TODO : Train a neural net perhaps

In [11]:

# Define the target date range
start_date = '2011-01-01'
end_date = '2020-01-01'
date_range = pd.date_range(start=start_date, end=end_date)

dataset_returns = []
# Filter the DataFrame to only include rows where the 'date' is in the specified date range
filtered_df = df_scores[df_scores['date'].isin(date_range_eval)]

# Step 2: Create the second DataFrame with combined_scores, portfolio_weights, total_return, and annualized_return
for index, row in filtered_df.iterrows():
    date = row['date']
    rsi_scores = row['rsi_scores']
    macd_scores = row['macd_scores']
    sma_scores = row['sma_scores']

    print(f"Processing {date} for Step 2")

    # Step 2a: Combine the signals (you can later train a model to adjust these weights)
    signal_weights = [1, 1, 1]  # You can adjust these weights later based on your model
    combined_scores = combine_signals(signal_weights, [rsi_scores, macd_scores, sma_scores])
    combined_scores_with_tickers = list(zip(tickers, combined_scores))

    print(f"Combined Scores: {combined_scores_with_tickers}")

    # Step 2b: Solve the portfolio based on the combined signal scores
    portfolio_weights = portfolio_solver.SolveSignalPortfolio(tickers, data, combined_scores)

    # Step 2c: Calculate the returns for the portfolio based on the optimized weights
    cumulative_returns, total_return, annualized_return = portfolio_solver.CalculatePortfolioReturns(tickers, data, portfolio_weights, start_date=date, time_period=30)

    # Step 2d: Add the calculated values to the second dataset
    dataset_returns.append({
        'date': date,
        'combined_scores': combined_scores,
        'portfolio_weights': portfolio_weights,
        'total_return': total_return,
        'annualized_return': annualized_return
    })

# Convert the second dataset into a DataFrame
df_returns = pd.DataFrame(dataset_returns)

Processing 2019-01-02 00:00:00 for Step 2
Combined Scores: [('AAPL', np.float64(36.56635712709539)), ('MSFT', np.float64(47.12130224731517)), ('TSLA', np.float64(42.99442018025284)), ('AMZN', np.float64(44.22458897282027)), ('GOOG', np.float64(48.27450874346967)), ('IBM', np.float64(32.95343778192971)), ('NFLX', np.float64(1.8540117612227434)), ('NVDA', np.float64(36.84823739536755)), ('AMD', np.float64(49.03939929273357)), ('INTC', np.float64(49.33618617752629))]
     pcost       dcost       gap    pres   dres
 0: -6.5219e-02 -1.0764e+00  1e+00  0e+00  4e+00
 1: -6.5580e-02 -8.4915e-02  2e-02  2e-16  7e-02
 2: -6.7034e-02 -6.7860e-02  8e-04  7e-17  1e-03
 3: -6.7341e-02 -6.7396e-02  5e-05  2e-16  2e-05
 4: -6.7387e-02 -6.7388e-02  1e-06  7e-17  1e-07
 5: -6.7388e-02 -6.7388e-02  1e-08  6e-17  1e-09
Optimal solution found.
Optimized Portfolio Weights:
{'AAPL': np.float64(0.0903207468961032), 'MSFT': np.float64(0.12421915159754975), 'TSLA': np.float64(0.11096520110428375), 'AMZN': np.fl

In [12]:
from sklearn.linear_model import Ridge
import numpy as np
import pandas as pd

def train_regularized_regression(X, y, alpha=1.0):
    """
    Trains a Ridge regression model to find the optimal weights for combining signals.

    Parameters:
    - X: The feature matrix (with columns as signals).
    - y: The target values (total return).
    - alpha: Regularization strength (higher values mean more regularization).

    Returns:
    - The weight matrix of size 3.
    """
    # Initialize the Ridge regression model with regularization
    model = Ridge(alpha=alpha)

    # Fit the model to the data
    model.fit(X, y)

    # Return the weight matrix (coefficients)
    return model.coef_

# Example of how to use this function


# Prepare the feature matrix X (aggregated values for RSI, MACD, SMA)
X = df_scores[['rsi_avg', 'macd_avg', 'sma_avg']].values

# Prepare the target y (total return)
y = df['total_return'].values.reshape(-1, 1)

# Train the regularized regression model (Ridge regression)
W_ridge = train_regularized_regression(X, y, alpha=1.0)

# Output the learned weight matrix W (should be of size 3)
print("Learned Weight Matrix (W):")
print(W_ridge)

KeyError: "None of [Index(['rsi_avg', 'macd_avg', 'sma_avg'], dtype='object')] are in the [columns]"