In [46]:
#Library Imports
import pandas as pd
import numpy as np
import os

In [47]:
#PRICING STRATEGIES

#BENCHMARKS

#Strategy 1
def buy_and_hold(b, price_relative_vectors):
    
    # Initialize an array to hold portfolio weights over time
    b_n = np.zeros_like(price_relative_vectors)
    
    # Set initial allocation
    b_n[0] = b
    
    # Calculate portfolio evolution without rebalancing
    for i in range(1, len(price_relative_vectors)):
        # The portfolio grows according to the price relatives
        b_n[i] = b_n[i-1] * price_relative_vectors[i-1]
        
        # Normalize to get portfolio weights (optional, if you want to track relative proportions)
        b_n[i] /= np.sum(b_n[i])
    
    return b_n

#Strategy 2
def best_stock(b, price_relative_vectors):
    b_n = np.zeros_like(price_relative_vectors)  # Initialize portfolio weights to zeros
    b_n[0,:] = np.ones(len(b)) / len(b)

    for i in range(1, len(price_relative_vectors)):
        # Find the index of the stock with the highest price relative in the current period
        best_stock_index = np.argmax(price_relative_vectors[i-1])
        
        # Allocate all the weight to the best stock for that period
        b_n[i, best_stock_index] = 1
    
    return b_n

#Strategy 3
def constant_rebalancing(b, price_relative_vectors):
    return np.array([b for _ in range(len(price_relative_vectors))])


#FOLLOW-THE-WINNER

#Strategy 4

def universal_portfolios(b, price_relative_vectors):
    T, N = price_relative_vectors.shape  # T is the number of time periods, N is the number of stocks
    portfolios = np.random.dirichlet(np.ones(N), N)  # Initialize random portfolios on simplex
    portfolio_weights = np.zeros((T, N))  # To store the universal portfolio's weights over time
    
    # Initialize wealth for each portfolio
    wealth = np.ones(N)
    
    for t in range(T):
        # Update wealth for each portfolio based on the price relatives for time t
        wealth *= portfolios.dot(price_relative_vectors[t-1])
        
        # Compute the average portfolio weights (weighted by their wealth)
        average_weights = np.average(portfolios, axis=0, weights=wealth)
        
        # Store the universal portfolio's weights for time t
        portfolio_weights[t] = average_weights
    
    return portfolio_weights

#Strategy 5

def exponential_gradient(b, price_relative_vectors, learning_rate = .5):

    T, N = price_relative_vectors.shape  # T is the number of time periods, N is the number of assets
    b_n = np.zeros_like(price_relative_vectors)  # To store portfolio weights over time
    b_n[0] = b  # Set initial portfolio allocation

    for t in range(T - 1):
        # Calculate the portfolio return at time t
        portfolio_return = np.dot(b_n[t], price_relative_vectors[t])

        for i in range(N):
            # Apply Helmbold's update rule for each asset
            b_n[t + 1, i] = b_n[t, i] * (learning_rate * (price_relative_vectors[t, i] / portfolio_return - 1) + 1)
        
        # Normalize the portfolio weights to sum to 1 after the update
        b_n[t + 1] /= np.sum(b_n[t + 1])

    return b_n
    
#Strategy 6
def follow_the_leader(b, price_relative_vectors, gamma = .5):
    T, N = price_relative_vectors.shape  # T is the number of time periods, N is the number of assets
    b_n = np.zeros_like(price_relative_vectors)  # Initialize portfolio weights over time
    b_n[0] = b  # Set initial allocation
    
    # Iterate over time periods
    for t in range(1, T):
        # Compute the BCRP up to time t (by maximizing the log cumulative return)
        cumulative_log_returns = np.sum(np.log(price_relative_vectors[:t]), axis=0)  # Sum log returns up to t
        b_star = np.exp(cumulative_log_returns)  # Exponentiate to get the proportional weights
        b_star /= np.sum(b_star)  # Normalize to ensure portfolio sums to 1
        
        # Update portfolio using the convex combination of b_star and b_t with parameter gamma
        b_n[t] = (1 - gamma) * b_star + gamma * b_n[t - 1]
        
        # Normalize again to ensure the portfolio sums to 1 (optional but helps maintain valid portfolio)
        b_n[t] /= np.sum(b_n[t])

    return b_n

#Strategy 7

# Helper function: Project a vector onto the simplex
def project_to_simplex(v):
    """ Project the vector v onto the probability simplex (sum to 1 and all entries >= 0) """
    n = len(v)
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u) - 1
    rho = np.nonzero(u > cssv / np.arange(1, n+1))[0][-1]
    theta = cssv[rho] / (rho + 1.0)
    return np.maximum(v - theta, 0)

# Online Newton Step (ONS) with L2-regularization
def follow_the_regularized_leader(b, price_relative_vectors, beta=.1, delta=0.5):
    T, N = price_relative_vectors.shape  # T is the number of time periods, N is the number of assets
    b_n = np.zeros_like(price_relative_vectors)  # Initialize portfolio weights over time
    b_n[0] = b  # Equal allocation initially
    
    # Initialize A_t as the identity matrix
    A_t = np.eye(N)
    
    for t in range(1, T):
        # Calculate portfolio return at time t
        portfolio_return = np.dot(b_n[t - 1], price_relative_vectors[t - 1])
        
        # Compute the price relatives outer product for A_t update
        x_t = price_relative_vectors[t - 1]
        A_t += np.outer(x_t, x_t) / (portfolio_return ** 2) + np.eye(N)
        
        # Compute p_t (the gradient-like vector)
        p_t = (1 + (1 / beta)) * np.sum([x_t / (np.dot(b_n[τ], price_relative_vectors[τ])) for τ in range(t)], axis=0)
        
        # Compute the next portfolio using the ONS update rule
        b_n[t] = np.dot(np.linalg.inv(A_t), p_t) * delta
        
        # Project the updated portfolio onto the simplex to ensure it's valid
        b_n[t] = project_to_simplex(b_n[t])
    
    return b_n


#Strategy 8
# Aggregating Algorithm (AA) by Vovk and Watkins
def aggregation_based(b, price_relative_vectors, learning_rate=0.5):
    T, N = price_relative_vectors.shape  # T is the number of time periods, N is the number of assets
    b_n = np.zeros_like(price_relative_vectors)  # Initialize portfolio weights over time
    b_n[0] = b  # Set initial portfolio allocation
    
    # Generate a set of base portfolios (experts) using Dirichlet distribution for diversity
    base_portfolios = np.random.dirichlet(np.ones(N), N)
    
    # Initialize the prior weights P_0(db) (uniform over the base portfolios)
    prior_weights = np.ones(N) / N
    
    for t in range(T):
        # Compute the performance of each base portfolio up to time t
        portfolio_performance = np.prod([np.dot(base_portfolios, price_relative_vectors[i]) for i in range(t)], axis=0)
        
        # Update the weights of the base portfolios based on their performance
        adjusted_weights = prior_weights * portfolio_performance**learning_rate
        # Normalize the weights
        adjusted_weights /= np.sum(adjusted_weights)
        
        # Compute the new portfolio as a weighted average of the base portfolios
        b_n[t] = np.dot(adjusted_weights, base_portfolios)
    
    return b_n






In [48]:
def initialize_portfolio(m):
    return np.ones(m) / m

def calculate_price_relative_vectors(folder_path, tickers):
    """
    Function to calculate the price relative vector for multiple stocks and return them side by side.

    Parameters:
    folder_path (str): The path to the folder containing the stock data.
    tickers (list): List of stock tickers to process.

    Returns:
    pd.DataFrame: A dataframe containing price relative vectors for each stock.
    """

    # Get list of all folder names that start with 'allstocks'
    folder_names = [name for name in os.listdir(folder_path) 
                    if os.path.isdir(os.path.join(folder_path, name)) and name.startswith('allstocks')]

    # Initialize an empty dataframe to store price relative vectors for all tickers
    price_relative_df = pd.DataFrame()

    # Column names expected in the CSV files
    column_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Split Factor', 'Earnings', 'Dividends']

    # Loop through each ticker
    for ticker in tickers:
        # Initialize an empty dataframe for the current ticker
        close_price_df = pd.DataFrame(columns=['Ticker', 'Date', 'Close'])

        # Loop through each folder (date)
        for date in folder_names: 
            stock_file = 'table_' + ticker + '.csv'
            file_path = os.path.join(folder_path, date, stock_file)

            # Load Data
            iter_df = pd.read_csv(file_path)
            iter_df.columns = column_names

            # Get Closing Price at End of Day
            last_row = iter_df[['Date', 'Close']].iloc[-1]
            last_row['Ticker'] = ticker

            # Append to the dataframe
            close_price_df = pd.concat([close_price_df, last_row.to_frame().T], ignore_index=True)

        # Convert Date to datetime and set as index
        close_price_df['Date'] = close_price_df['Date'].astype(int)
        close_price_df['Date'] = pd.to_datetime(close_price_df['Date'], format='%Y%m%d')
        close_price_df.set_index('Date', inplace=True)

        # Calculate Price Relative Vector
        close_price_df[ticker] = (close_price_df['Close'] / close_price_df['Close'].shift(1)).fillna(1)

        # Add the ticker's price relative vector to the final dataframe
        if price_relative_df.empty:
            price_relative_df = close_price_df[[ticker]]
        else:
            price_relative_df = price_relative_df.join(close_price_df[[ticker]], how='outer')

    return price_relative_df

def calculate_period_return(b_t, x_t):
    """
    Calculate the return of the portfolio in a single period.
    
    Parameters:
    b_t : array-like
        Portfolio vector at time t.
    x_t : array-like
        Price relative vector at time t.
    
    Returns:
    float : Portfolio period return
    """
    return np.dot(b_t, x_t)

def calculate_cumulative_wealth(b_n_1, price_relative_vectors, S0=1):
    """
    Calculate the cumulative wealth over n periods given a portfolio strategy.
    
    Parameters:
    b_n_1 : list of portfolio vectors
        A list of portfolio vectors for each period.
    price_relative_vectors : array-like
        A matrix of price relative vectors for each period.
    S0 : float, optional
        Initial wealth (default is 1).
    
    Returns:
    float : Cumulative wealth after n periods.
    """
    cumulative_wealth = S0
    for t, x_t in enumerate(price_relative_vectors):
        # Calculate portfolio return for period t
        period_return = calculate_period_return(b_n_1[t], x_t)
        # Update cumulative wealth
        cumulative_wealth *= period_return
    return cumulative_wealth

def calculate_exponential_growth_rate(Sn, n, S0=1):
    """
    Calculate the exponential growth rate (Wn) after n periods.
    
    Parameters:
    Sn : float
        Final cumulative wealth.
    n : int
        Number of periods.
    
    Returns:
    float : Exponential growth rate.
    """
    return (1 / n) * np.log(Sn / S0)


In [49]:
# INITIALIZATION
folder_path = 'C:\\Users\\nlahanis\\OneDrive - Huron Consulting Group\\Documents\\NYU\\Research\\Online-Quantitative-Trading-Strategies\\Data'
tickers = ['aapl', 'msft', 'goog']  # List of tickers you want to process
b = initialize_portfolio(len(tickers))

# Calculate the relative price vectors for each stock
price_relative_vectors = calculate_price_relative_vectors(folder_path, tickers)
price_relative_vectors_array = np.array(price_relative_vectors)

# PORTFOLIO STRATEGY HERE
b_n = aggregation_based(b, price_relative_vectors_array)

# Calculate final cumulative wealth after n periods
Sn = calculate_cumulative_wealth(b_n, np.array(price_relative_vectors))

# Calculate exponential growth rate
Wn = calculate_exponential_growth_rate(Sn, len(price_relative_vectors))

  close_price_df[ticker] = (close_price_df['Close'] / close_price_df['Close'].shift(1)).fillna(1)
  close_price_df[ticker] = (close_price_df['Close'] / close_price_df['Close'].shift(1)).fillna(1)


[[0.07445525 0.45551192 0.47003283]
 [0.07445525 0.45551192 0.47003283]
 [0.07442203 0.45574414 0.46983383]
 [0.07430434 0.45656862 0.46912705]
 [0.07425088 0.45694303 0.46880609]
 [0.07423445 0.4570579  0.46870764]
 [0.07422146 0.45714878 0.46862975]
 [0.07392144 0.45924788 0.46683068]
 [0.07415546 0.45760888 0.46823567]
 [0.07405697 0.45829799 0.46764504]
 [0.07403153 0.45847615 0.46749232]
 [0.07398928 0.458772   0.46723873]
 [0.07401312 0.45860542 0.46738146]
 [0.07406751 0.45822423 0.46770826]
 [0.07412722 0.45780649 0.46806629]
 [0.07412924 0.45779265 0.46807812]
 [0.07415884 0.45758586 0.46825531]
 [0.07412597 0.45781604 0.46805799]
 [0.07414426 0.45768805 0.4681677 ]
 [0.0740202  0.45855651 0.46742329]
 [0.07403421 0.45845741 0.46750839]
 [0.07404522 0.45838004 0.46757474]
 [0.07411423 0.45789692 0.46798885]
 [0.07408834 0.45807806 0.4678336 ]
 [0.07410368 0.45797092 0.4679254 ]
 [0.07413211 0.45777207 0.46809582]
 [0.07412352 0.45783243 0.46804405]
 [0.07435695 0.456197   0.46

  close_price_df[ticker] = (close_price_df['Close'] / close_price_df['Close'].shift(1)).fillna(1)
