In [1]:
import pandas as pd

from files.functions import *
import warnings
import os
warnings.filterwarnings('ignore')

# Set up the Data

In [2]:
myCoins = ['BTC']

In [None]:
for myCoin in tqdm(myCoins):
    coinbase_market_analysis_gradient(myCoin, dataPath='data', plotPath='plots')
    cn = newspaper_sentiment_pipeline(myCoin, newspaper_path=f'../newspapers/{COIN}_newspapers.csv', queries_path='files/queries.txt')

## Train Models

In [None]:
notebooks = [i for i in os.listdir('notebooks') if i.endswith('_training.ipynb')]
for notebook in tqdm(notebooks):
    os.system(f'jupyter nbconvert --execute --to notebook --inplace notebooks/{notebook}')

## Run Predictions

In [3]:
coins = os.listdir('models')
for coin in tqdm(coins):
    predictions = predict_matrix(coin)
    predictions.to_csv(f'predictions/{coin}_predictions.csv')

100%|██████████| 1/1 [00:00<00:00,  1.33it/s]


## Portfolio Optimizer

Compute Ranking based on Standardized RMSE

In [4]:
rmses = {}
for myCoin in tqdm(myCoins):
    errors = os.listdir(os.path.join('metrics', myCoin))
    for model_rmse in errors:
        model_name = model_rmse.split('_')[0].upper()
        file_path = os.path.join('metrics', myCoin, model_rmse)
        with open(file_path) as f:
            rmses[model_name] = float(f.read())
rmses = pd.Series(rmses)
rmses.rank().sort_values(ascending=False)

100%|██████████| 1/1 [00:00<00:00, 336.46it/s]


KNN            8.0
TRANSFORMER    7.0
TFT            6.0
ARIMA          5.0
LSTM           4.0
SVM            3.0
GBM            2.0
PROPHET        1.0
dtype: float64

### Portfolio Optimization

**Method:**
1. **Ensemble forecast** — for each coin, average the `m` model predictions weighted by `1 / RMSE` (better-calibrated models carry more weight).
2. **Daily expected returns** — compare each day's ensemble price to the previous day (or to today's closing price for day 0).
3. **Allocation** — solve the long-only max-return problem each day:

$$\max_{\mathbf{w}} \sum_i w_i r_i \quad \text{s.t.} \quad \sum_i w_i \le 1,\; w_i \ge 0$$

   The closed-form solution is *proportional to positive expected returns*: coins with a negative forecast get 0 weight and the corresponding capital stays in cash.
4. **Simulation** — compound the portfolio value day-by-day using the weighted return.

> To add more coins, extend `myCoins` at the top of the notebook.

In [6]:
import numpy as np

# ─────────────────────────────────────────────────────────────────────────────
# USER PARAMETERS
# ─────────────────────────────────────────────────────────────────────────────
PRINCIPAL       = 10_000    # Starting investment in USD
COINS_PORTFOLIO = myCoins   # Coins to include — add more to myCoins at the top
# ─────────────────────────────────────────────────────────────────────────────

# ── Step 1: RMSE-weighted ensemble price forecast per coin ────────────────────
# Lower RMSE → more trustworthy model → higher weight in the ensemble average.
ensemble_prices = {}   # coin → pd.Series of predicted prices (len = TEST_DAYS)
current_prices  = {}   # coin → last known daily close price

for coin in COINS_PORTFOLIO:
    # Get prediction matrix directly from predict_matrix (models × dates)
    matrix = predict_matrix(coin)
    matrix.index = matrix.index.str.upper()   # normalise case for RMSE matching

    # Load each model's standardised RMSE for this coin
    coin_rmses = {}
    for fname in os.listdir(f'metrics/{coin}'):
        model_tag = fname.split('_')[0].upper()
        with open(f'metrics/{coin}/{fname}') as fh:
            coin_rmses[model_tag] = float(fh.read())

    rmse_s  = pd.Series(coin_rmses).reindex(matrix.index)
    weights = (1 / rmse_s).fillna(0)          # weight ∝ 1/RMSE
    weights /= weights.sum()                   # normalise to sum = 1

    ensemble_prices[coin] = (
        matrix.astype(float).multiply(weights.values, axis=0).sum(axis=0)
    )

    # Last daily close from historical data
    raw = pd.read_csv(fullDataPath(coin))
    daily = dataSetup(raw, trainingColPath=TRAINING_COLUMNS,
                      response=RESPONSE_VARIABLE, number=LIMIT)
    current_prices[coin] = float(daily[RESPONSE_VARIABLE].iloc[-1])

# Shape: (TEST_DAYS, n_coins)
ensemble_df = pd.DataFrame(ensemble_prices)

print("Ensemble predicted prices (USD):")
print(ensemble_df.round(2).to_string())
print("\nCurrent prices:", {c: f'${v:,.2f}' for c, v in current_prices.items()})

# ── Step 2: Daily expected returns ────────────────────────────────────────────
# Day 0 return: current → first predicted price
# Day d return: predicted[d-1] → predicted[d]
returns_df = pd.DataFrame(index=ensemble_df.index,
                          columns=COINS_PORTFOLIO, dtype=float)
for i, date in enumerate(ensemble_df.index):
    for coin in COINS_PORTFOLIO:
        prev = (current_prices[coin] if i == 0
                else float(ensemble_df[coin].iloc[i - 1]))
        curr = float(ensemble_df[coin].iloc[i])
        returns_df.loc[date, coin] = (curr - prev) / prev

returns_df = returns_df.astype(float)
print("\nDaily expected returns (%):")
print((returns_df * 100).round(3).to_string())

# ── Step 3: Optimal allocation ────────────────────────────────────────────────
# Strategy: allocate proportionally to positive expected returns.
# Coins with negative predicted return get 0 weight — capital stays in cash.
# When every coin is negative, hold 100 % cash for that day.
#
# This is equivalent to solving:
#   max  Σ wᵢ rᵢ   s.t.  Σ wᵢ ≤ 1,  wᵢ ≥ 0
# whose closed-form solution is proportional-to-positive-returns allocation.

def _allocate(returns_row: np.ndarray) -> np.ndarray:
    pos   = np.maximum(returns_row, 0.0)
    total = pos.sum()
    return pos / total if total > 0 else np.zeros(len(returns_row))

# ── Step 4: Simulate portfolio value ─────────────────────────────────────────
portfolio_log   = []
portfolio_value = float(PRINCIPAL)

for date in ensemble_df.index:
    r = returns_df.loc[date].values.astype(float)
    w = _allocate(r)

    # Portfolio return: cash portion (1 - Σwᵢ) earns 0 %; invested portion earns Σwᵢrᵢ
    daily_return    = float(np.dot(w, r))
    portfolio_value = portfolio_value * (1 + daily_return)

    row = {
        'date':              date,
        'portfolio_value':   round(portfolio_value, 2),
        'daily_return_%':    round(daily_return * 100, 4),
    }
    for coin, wi in zip(COINS_PORTFOLIO, w):
        row[f'{coin}_%'] = round(wi * 100, 2)
    row['cash_%'] = round((1 - w.sum()) * 100, 2)
    portfolio_log.append(row)

result_df = pd.DataFrame(portfolio_log).set_index('date')

print(f"\n{'Principal':20s}  ${PRINCIPAL:>12,.2f}")
print(f"{'Final portfolio':20s}  ${portfolio_value:>12,.2f}")
print(f"{'Total return':20s}  {((portfolio_value / PRINCIPAL) - 1) * 100:>+11.2f}%")
print()
print(result_df.to_string())

Ensemble predicted prices (USD):
             BTC
Model           
GBM          0.0
SVM          0.0
KNN          0.0
ARIMA        0.0
LSTM         0.0
TFT          0.0
Transformer  0.0
Prophet      0.0

Current prices: {'BTC': '$64,625.78'}


ZeroDivisionError: float division by zero

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

fig, (ax_val, ax_alloc) = plt.subplots(
    2, 1, figsize=(13, 9), gridspec_kw={'height_ratios': [3, 2]}
)

# ── Portfolio value over time ─────────────────────────────────────────────────
pv_series   = [PRINCIPAL] + result_df['portfolio_value'].tolist()
date_labels = ['Now'] + result_df.index.tolist()
x           = range(len(pv_series))

ax_val.plot(x, pv_series, marker='o', linewidth=2.5, color='#2196F3', zorder=3)
ax_val.fill_between(x, PRINCIPAL, pv_series,
                    where=[v >= PRINCIPAL for v in pv_series],
                    color='#4CAF50', alpha=0.25, label='Gain')
ax_val.fill_between(x, PRINCIPAL, pv_series,
                    where=[v < PRINCIPAL for v in pv_series],
                    color='#F44336', alpha=0.25, label='Loss')
ax_val.axhline(PRINCIPAL, color='gray', linestyle='--', alpha=0.7,
               label=f'Principal ${PRINCIPAL:,.0f}')
for i, v in enumerate(pv_series):
    ax_val.annotate(f'${v:,.0f}', (i, v),
                    textcoords='offset points', xytext=(0, 10),
                    ha='center', fontsize=8)
ax_val.set_xticks(list(x))
ax_val.set_xticklabels(date_labels, rotation=20, ha='right')
ax_val.yaxis.set_major_formatter(mtick.StrMethodFormatter('${x:,.0f}'))
ax_val.set_ylabel('Portfolio Value (USD)')
ax_val.set_title(
    f'Portfolio Optimisation  —  {", ".join(COINS_PORTFOLIO)}  |  Principal: ${PRINCIPAL:,.0f}',
    fontsize=13, pad=10
)
ax_val.legend()
ax_val.grid(True, alpha=0.3)

# ── Daily allocation stacked bar ──────────────────────────────────────────────
alloc_cols = [f'{c}_%' for c in COINS_PORTFOLIO] + ['cash_%']
bar_labels = COINS_PORTFOLIO + ['Cash']
bar_colors = ['#FF9800', '#4CAF50', '#2196F3', '#9C27B0', '#00BCD4', '#F44336', '#795548']
x_pos      = range(len(result_df))
bottom     = np.zeros(len(result_df))

for idx, (col, label) in enumerate(zip(alloc_cols, bar_labels)):
    vals = result_df[col].values
    ax_alloc.bar(x_pos, vals, bottom=bottom, label=label,
                 color=bar_colors[idx % len(bar_colors)], alpha=0.85)
    for j, (v, b) in enumerate(zip(vals, bottom)):
        if v > 5:
            ax_alloc.text(j, b + v / 2, f'{v:.0f}%',
                          ha='center', va='center',
                          fontsize=9, fontweight='bold', color='white')
    bottom += vals

ax_alloc.set_xticks(list(x_pos))
ax_alloc.set_xticklabels(result_df.index.tolist(), rotation=20, ha='right')
ax_alloc.set_ylabel('Allocation (%)')
ax_alloc.set_ylim(0, 115)
ax_alloc.legend(loc='upper right')
ax_alloc.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()