In [None]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

def arma11_negloglike(params, y):
    c, phi, theta, sigma2 = params
    n = len(y)
    if sigma2 <= 0:
        return 1e15
    eps = np.zeros(n)
    yhat = np.zeros(n)
    yhat[0] = c
    eps[0] = y[0] - yhat[0]

    for t in range(1, n):
        yhat[t] = c + phi * y[t - 1] + theta * eps[t-1]
        eps[t] = y[t] - yhat[t]

    negLL = 0.5 * n * np.log(2 * np.pi * sigma2) + 0.5* np.sum(eps**2) / sigma2
    return negLL

def fit_arma11(y):
    """
    Fit an ARMA(1, 1) model on the data y by minimizing the neg log likelihood.
    Returns the estimated parameters: [c, phi, theta, sigma2]
    """
    x0 = np.array([0.0, 0.1, 0.1, np.var(y)])
    bounds = [
        (-10, 10),       # c
        (-0.99, 0.99),   # phi
        (-0.99, 0.99),   # theta
        (1e-8, None)     # sigma2 must be positive
    ]
    res = scipy.optimize.minimize(lambda params: arma11_negloglike(params, y), x0, method = 'L-BFGS-B', bounds = bounds)
    if res.success:
        return res.x
    else:
        print("Warning: Arma(1,1) fit did not converge")
        return None
        

class ARIMAForecaster:
    """
    A simplified ARIMA(1,0,1) forecaster that uses offline-fitted parameters.
    The parameters for each asset are stored in a dictionary.
    """
    def __init__(self, arima_params):
        # arima_params should be a dict structured as:
        # { 'Asset_A': {'const': c, 'ar.L1': phi, 'ma.L1': theta}, ... }
        self.params = arima_params
        self.last_return = {}   # Last observed return for each asset
        self.last_resid = {}    # Last forecast error (residual) for each asset

    def initialize(self, last_returns):
        # last_returns is a dict of the most recent return values for each asset.
        for asset, r in last_returns.items():
            self.last_return[asset] = r
            self.last_resid[asset] = 0.0  # Initialization: assume zero residual at start

    def forecast(self):
        # Computes one-step-ahead forecasts for all assets.
        forecasts = {}
        for asset in self.params:
            c = self.params[asset]['const']
            phi = self.params[asset]['ar.L1']
            theta = self.params[asset]['ma.L1']
            r_forecast = c + phi * self.last_return[asset] + theta * self.last_resid[asset]
            forecasts[asset] = r_forecast
        return forecasts

    def update(self, observed_returns):
        # After observing returns at time T, update the stored values.
        for asset, r_actual in observed_returns.items():
            c = self.params[asset]['const']
            phi = self.params[asset]['ar.L1']
            theta = self.params[asset]['ma.L1']
            # Forecast we made at last step:
            r_hat = c + phi * self.last_return[asset] + theta * self.last_resid[asset]
            resid = r_actual - r_hat
            self.last_return[asset] = r_actual
            self.last_resid[asset] = resid

# --- Step 2 & 3: Portfolio Optimization using Forecasted Returns ---
def optimize_portfolio(mu_forecast, cov, rf=0.0):
    """
    Given a forecast of returns (mu_forecast) and a covariance matrix (cov),
    optimize the portfolio to maximize the Sharpe ratio.
    
    The optimizer minimizes the negative Sharpe ratio.
    """
    num_assets = len(mu_forecast)
    x0 = np.full(num_assets, 1/num_assets)  # equal-weight starting point
    
    def neg_sharpe(w):
        exp_ret = np.dot(w, mu_forecast) - rf
        risk = np.sqrt(np.dot(w, np.dot(cov, w)))
        if risk < 1e-8:
            return 1e6
        return - exp_ret / risk
    
    # Constraints: weights sum to 1 and each weight in [-1, 1]
    bounds = [(-1, 1)] * num_assets
    cons = [{'type': 'eq', 'fun': lambda w: np.sum(w) - 1}]
    
    res = scipy.optimize.minimize(neg_sharpe, x0, bounds=bounds, constraints=cons, method="SLSQP")
    return res.x if res.success else x0

# --- Bringing It All Together ---
class ARIMA_Portfolio_Allocator:
    def __init__(self, price_data, arima_params, window=100):
        """
        price_data: DataFrame of historical prices (index sorted by time)
        arima_params: Offline-fitted ARIMA parameters for each asset (dictionary)
        window: number of days to use for covariance estimation
        """
        self.price_data = price_data
        # Compute returns from prices:
        self.window = window
        
        # Initialize our ARIMA forecaster with last known returns from training data
        self.returns = price_data.pct_change().dropna()
        self.assets = self.returns.columns.tolist()
        initial_returns = self.returns.iloc[-1].to_dict()
        self.forecaster = ARIMAForecaster(arima_params)
        self.forecaster.initialize(initial_returns)
    
    def allocate(self, current_prices):
        """
        Given a row of current prices (as a pandas Series with asset names as index),
        compute the portfolio weights for the next period.
        """
        # 1. Forecast returns using the ARIMA model:
        forecasted_returns = self.forecaster.forecast()
        # Order forecasted returns in the same order as self.assets:
        mu_forecast = np.array([forecasted_returns[asset] for asset in self.assets])
        
        # 2. Estimate covariance matrix from a rolling window of past returns:
        if len(self.returns) >= self.window:
            recent_returns = self.returns.iloc[-self.window:]
        else:
            recent_returns = self.returns
        cov = np.cov(recent_returns[self.assets].T)
        
        # 3. Solve the optimization problem:
        weights = optimize_portfolio(mu_forecast, cov)
        return weights
    
    def update_with_new_prices(self, new_prices):
        """
        After each new timestep, update the forecaster with the observed returns.
        new_prices: A pandas Series or dict with new prices. It must correspond to the assets.
        """
        # Get the last known prices from self.price_data (which should have proper column names)
        last_prices = self.price_data.iloc[-1]
        
        # Compute new returns as percentage changes.
        # new_prices should be a Series with asset names matching self.price_data.columns.
        new_returns = (new_prices - last_prices) / last_prices
        
        # Force the index of new_returns to match the asset names exactly.
        new_returns.index = self.price_data.columns
        
        # Convert new_returns to a dict.
        returns_dict = new_returns.to_dict()
        
        # Update the ARIMA forecaster with the observed returns.
        self.forecaster.update(returns_dict)
        
        # Now, update the historical prices. 
        # Make sure to turn new_prices into a DataFrame with the same columns as self.price_data.
        if isinstance(new_prices, pd.Series):
            new_prices_df = pd.DataFrame([new_prices], columns=self.price_data.columns)
        else:
            new_prices_df = new_prices.reindex(columns=self.price_data.columns)
        
        self.price_data = pd.concat([self.price_data, new_prices_df], ignore_index=True)
        
data = pd.read_csv('case2.csv', index_col=0)
data.reset_index(drop=False, inplace=True)
data.rename(columns={"index": "Asset_1"}, inplace=True)
data.index = data.index + 1

'''
We recommend that you change your train and test split
'''

TRAIN, TEST = train_test_split(data, test_size = 0.2, shuffle = False)


def build_arima_params(price_data):
    """
    Helper function to compute ARMA(1,1) parameters for each asset based on the price data.
    We compute returns, then fit the model asset-by-asset.
    """
    returns_df = price_data.pct_change().dropna()
    params_est = {}
    for asset in returns_df.columns:
        y = returns_df[asset].values  # asset return series
        fitted = fit_arma11(y)
        if fitted is not None:
            c, phi, theta, sigma2 = fitted
            params_est[asset] = {"const": c, "ar.L1": phi, "ma.L1": theta}
    return params_est

arima_params = build_arima_params(TRAIN)
arima_allocator = ARIMA_Portfolio_Allocator(TRAIN, arima_params, window=100)

class Allocator():
    def __init__(self, train_data, window = 100):
        '''
        Anything data you want to store between days must be stored in a class field
        '''
        
        self.running_price_paths = train_data.copy()
        
        self.train_data = train_data.copy()

        self.window = window

        self.arima_params = build_arima_params(train_data)

        self.allocator = ARIMA_Portfolio_Allocator(train_data, self.arima_params, window=self.window)
        
        # Do any preprocessing here -- do not touch running_price_paths, it will store the price path up to that data

    
        
    def allocate_portfolio(self, asset_prices):
        '''
        asset_prices: np array of length 6, prices of the 6 assets on a particular day
        weights: np array of length 6, portfolio allocation for the next day
        '''
    
        asset_prices_series = pd.Series(asset_prices, index=self.train_data.columns)

        weights = self.allocator.allocate(asset_prices_series)

        self.allocator.update_with_new_prices(asset_prices_series)

        self.running_price_paths = pd.concat(
            [self.running_price_paths, pd.DataFrame([asset_prices_series])],
            ignore_index=True
        )
        
        
        return weights


def grading(train_data, test_data): 
    '''
    Grading Script
    '''
    weights = np.full(shape=(len(test_data.index),6), fill_value=0.0)
    alloc = Allocator(train_data)
    for i in range(0,len(test_data)):
        weights[i,:] = alloc.allocate_portfolio(test_data.iloc[i,:])
        if np.sum(weights < -1) or np.sum(weights > 1):
            raise Exception("Weights Outside of Bounds")
    
    capital = [1]
    for i in range(len(test_data) - 1):
        shares = capital[-1] * weights[i] / np.array(test_data.iloc[i,:])
        balance = capital[-1] - np.dot(shares, np.array(test_data.iloc[i,:]))
        net_change = np.dot(shares, np.array(test_data.iloc[i+1,:]))
        capital.append(balance + net_change)
    capital = np.array(capital)
    returns = (capital[1:] - capital[:-1]) / capital[:-1]
    
    if np.std(returns) != 0:
        sharpe = np.mean(returns) / np.std(returns)
    else:
        sharpe = 0
        
    return sharpe, capital, weights

sharpe, capital, weights = grading(TRAIN, TEST)
#Sharpe gets printed to command line
print(sharpe)

plt.figure(figsize=(10, 6), dpi=80)
plt.title("Capital")
plt.plot(np.arange(len(TEST)), capital)
plt.show()

plt.figure(figsize=(10, 6), dpi=80)
plt.title("Weights")
plt.plot(np.arange(len(TEST)), weights)
plt.legend(TEST.columns)
plt.show()

NameError: name 'arma11_negloglike' is not defined