In [None]:
import numpy as np
from scipy.optimize import minimize

def log_returns(prices):
    return np.log(np.array(prices[1:]) / np.array(prices[:-1]))

def negative_log_posterior(params, log_returns):
    mu, sigma = params
    if sigma <= 0:
        return np.inf  # Constraint to ensure sigma is positive
    # Calculating the negative log likelihood
    n = len(log_returns)
    expected = (mu - 0.5 * sigma**2)
    log_likelihood = -n/2 * np.log(2 * np.pi * sigma**2) - np.sum((log_returns - expected) ** 2) / (2 * sigma**2)
    # Simple prior: negative log (1/sigma) encourages sigma > 0
    log_prior = np.log(sigma) if sigma > 0 else np.inf
    # We return the negative of the sum because we are minimizing
    return -(log_likelihood + log_prior)

def estimate_params(prices):
    log_ret = log_returns(prices)
    # Initial guesses for mu and sigma
    initial_guess = [np.mean(log_ret), np.std(log_ret)]
    # Minimize the negative log posterior
    result = minimize(negative_log_posterior, initial_guess, args=(log_ret,), bounds=[(None, None), (0, None)])
    if result.success:
        mu_est, sigma_est = result.x
        return mu_est, sigma_est
    else:
        raise Exception("Optimization did not converge")

# Sample data: Replace this with your actual stock price data
prices = [100, 102, 105, 103, 106, 108]
mu_est, sigma_est = estimate_params(prices)
print("Estimated mu:", mu_est)
print("Estimated sigma:", sigma_est)


In [None]:
import numpy as np

In [None]:
a = np.array([[0.7, 0.22, 0.08], [0.53,0.29,0.18], [0.5,0.3,0.2]])

In [None]:
a @ a @ a

In [None]:
BASE_LINE_HORIZON = 256
RISK_FREE_RATE = 0.05
import os
import pandas as pd
import numpy as np
def get_all_errors_and_mu(periods):
  all_errors = None
  all_mu = None
  for time_horizon in periods:
    data_dir = f'../processed_data_{time_horizon}'
    df = pd.read_csv(os.path.join(data_dir, 'all_errors.csv'),  index_col=0, parse_dates=True)
    # compute the mean of the errors
    avg_err = df.mean(axis=1).mean()
    df = df * np.sqrt(BASE_LINE_HORIZON / time_horizon)
    print(f'Average error for time horizon {time_horizon}: {avg_err}')

    df = df.add_suffix(f'_{time_horizon}')
    mu = np.load(os.path.join(data_dir, 'mu.npy'))
    mu = mu * (BASE_LINE_HORIZON / time_horizon) - RISK_FREE_RATE

    if all_errors is None:
      all_errors = df
    else:
      all_errors = pd.concat([all_errors, df], axis=1, join='outer')

    if all_mu is None:
      all_mu = mu
    else:
      all_mu = np.concatenate([all_mu, mu])

  return all_errors, all_mu

In [None]:
periods = [8,16,32, 64,128,256]
all_errors, all_mu = get_all_errors_and_mu([8,16,32, 64,128,256])

In [None]:
all_mu

In [None]:
all_errors.shape

In [None]:
all_mu.shape

In [None]:
all_errors.columns

In [None]:
idx = np.where(all_mu < 0)[0]
idx

In [None]:
print(f'Number of assets with mu < 0): {len(idx)}')

In [None]:
all_errors.columns[idx]

In [None]:
remove_columns = set()
for asset in all_errors.columns[idx]:
  asset_name = asset.split('_')[0]
  asset_period = int(asset.split('_')[1])
  for time_horizon in [32, 64, 128, 256]:
    if time_horizon >= asset_period:
      remove_columns.add(f'{asset_name}_{time_horizon}')
  

In [None]:
remove_columns = sorted(list(remove_columns))

In [None]:
len(remove_columns)

In [None]:
# find the indices of the columns to remove
indices = [all_errors.columns.get_loc(col) for col in sorted(remove_columns)]

In [None]:
indices

In [None]:
remove_columns

In [None]:
len(remove_columns)