In [None]:
# ── Google Colab Setup ────────────────────────────────────────────────────────
# This cell is a no-op when running locally.
import sys, os

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from pathlib import Path
    import subprocess

    repo_dir = Path('/content/cryptoTrading2')

    # 1. Clone repo (skipped if already present)
    if not repo_dir.exists():
        subprocess.run(
            ['git', 'clone', 'https://github.com/kingsuching/cryptoTrading2.git', str(repo_dir)],
            check=True,
        )

    # 2. Set working directory and Python path
    os.chdir(str(repo_dir))
    if str(repo_dir) not in sys.path:
        sys.path.insert(0, str(repo_dir))

    # 3. Install dependencies
    subprocess.run(
        [sys.executable, '-m', 'pip', 'install', '-q',
         'pandas', 'numpy', 'scikit-learn', 'statsmodels', 'prophet',
         'torch', 'matplotlib', 'tqdm', 'transformers',
         'beautifulsoup4', 'requests', 'serpapi'],
        check=True,
    )

    # 4. Write API keys from Colab Secrets (add CMC_KEY & SERPAPI_KEY in the key icon sidebar)
    try:
        from google.colab import userdata
        cmc     = userdata.get('CMC_KEY')     or ''
        serpapi = userdata.get('SERPAPI_KEY') or ''
    except Exception:
        cmc, serpapi = '', ''
    with open('files/API_KEYS.py', 'w') as _f:
        _f.write(f'CMC_KEY = "{cmc}"\nSERPAPI_KEY = "{serpapi}"\n')
    if not cmc or not serpapi:
        print("NOTE: API keys not set — add CMC_KEY & SERPAPI_KEY in Colab Secrets if data fetching is needed.")

    print(f"Colab setup complete. CWD: {os.getcwd()}")

In [None]:
import os, sys
from pathlib import Path

_root = next((p for p in [Path(os.getcwd()), *Path(os.getcwd()).parents]
              if p.name == 'cryptoTrading2'), None)
if _root:
    os.chdir(str(_root))
if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from tqdm import tqdm
from files.functions import *

# Set up the Data

In [None]:
myCoins = ['ETH', 'SOL']

In [None]:
for myCoin in tqdm(myCoins):
    coinbase_market_analysis_gradient(myCoin, dataPath='data', plotPath='plots')
    cn = newspaper_sentiment_pipeline(myCoin, newspaper_path=f'../newspapers/{COIN}_newspapers.csv', queries_path='files/queries.txt')

## Train Models

In [None]:
from files.functions import train_all_models

for myCoin in tqdm(myCoins):
    train_all_models(myCoin)

## Run Predictions

In [None]:
coins = os.listdir('models')
for coin in tqdm(coins):
    predictions = predict_matrix(coin)
    predictions.to_csv(f'predictions/{coin}_predictions.csv')

## Portfolio Optimizer

Compute Ranking based on Standardized RMSE

In [None]:
rmses = {}
for myCoin in tqdm(myCoins):
    errors = os.listdir(os.path.join('metrics', myCoin))
    for model_rmse in errors:
        model_name = model_rmse.split('_')[0].upper()
        file_path = os.path.join('metrics', myCoin, model_rmse)
        with open(file_path) as f:
            rmses[model_name] = float(f.read())
rmses = pd.Series(rmses)
rmses.rank().sort_values()

### Portfolio Optimization

**Method:**
1. **Ensemble forecast** — for each coin, average the `m` model predictions weighted by `1 / RMSE` (better-calibrated models carry more weight).
2. **Daily expected returns** — compare each day's ensemble price to the previous day (or to today's closing price for day 0).
3. **Allocation** — solve the long-only max-return problem each day:

$$\max_{\mathbf{w}} \sum_i w_i r_i \quad \text{s.t.} \quad \sum_i w_i \le 1,\; w_i \ge 0$$

   The closed-form solution is *proportional to positive expected returns*: coins with a negative forecast get 0 weight and the corresponding capital stays in cash.
4. **Simulation** — compound the portfolio value day-by-day using the weighted return.

> To add more coins, extend `myCoins` at the top of the notebook.

In [None]:
import numpy as np

# ─────────────────────────────────────────────────────────────────────────────
# USER PARAMETERS
# ─────────────────────────────────────────────────────────────────────────────
PRINCIPAL       = 100    # Starting investment in USD
COINS_PORTFOLIO = myCoins   # Coins to include — add more to myCoins at the top
# ─────────────────────────────────────────────────────────────────────────────

# ── Step 1: RMSE-weighted ensemble price forecast per coin ────────────────────
# Lower RMSE → more trustworthy model → higher weight in the ensemble average.
ensemble_prices = {}
current_prices  = {}

for coin in COINS_PORTFOLIO:
    matrix = predict_matrix(coin)
    matrix.index = matrix.index.str.upper()

    coin_rmses = {}
    for fname in os.listdir(f'metrics/{coin}'):
        model_tag = fname.split('_')[0].upper()
        with open(f'metrics/{coin}/{fname}') as fh:
            coin_rmses[model_tag] = float(fh.read())

    rmse_s  = pd.Series(coin_rmses).reindex(matrix.index)
    weights = (1 / rmse_s).fillna(0)
    w_sum   = weights.sum()
    if w_sum == 0:
        weights = pd.Series(1.0 / len(matrix), index=matrix.index)
    else:
        weights /= w_sum

    ensemble_prices[coin] = (
        matrix.astype(float).multiply(weights.values, axis=0).sum(axis=0)
    )

    raw   = pd.read_csv(fullDataPath(coin))
    daily = dataSetup(raw, trainingColPath=TRAINING_COLUMNS,
                      response=RESPONSE_VARIABLE, number=LIMIT)
    current_prices[coin] = float(daily[RESPONSE_VARIABLE].iloc[-1])

ensemble_df = pd.DataFrame(ensemble_prices)

print("Ensemble predicted prices (USD):")
print(ensemble_df.round(2).to_string())
print("\nCurrent prices:", {c: f'${v:,.2f}' for c, v in current_prices.items()})

# ── Step 2: Cumulative returns and optimal exit day per coin ──────────────────
# cum_return[coin][d] = (predicted_price[d] - current_price) / current_price
# We exit each coin on whichever day maximises this cumulative return.
cum_returns_df = pd.DataFrame(index=ensemble_df.index, columns=COINS_PORTFOLIO, dtype=float)
opt_days    = {}   # coin → index of best exit day within the forecast window
opt_returns = {}   # coin → max cumulative return (may be negative → stay in cash)

for coin in COINS_PORTFOLIO:
    cp = current_prices[coin]
    for i, date in enumerate(ensemble_df.index):
        pred = float(ensemble_df[coin].iloc[i])
        cum_returns_df.loc[date, coin] = (pred - cp) / cp if cp != 0 else 0.0
    cum_returns_df[coin] = cum_returns_df[coin].astype(float)
    best_idx          = int(cum_returns_df[coin].values.argmax())
    opt_days[coin]    = best_idx
    opt_returns[coin] = float(cum_returns_df[coin].iloc[best_idx])

print("\nCumulative returns from current price (%):")
print((cum_returns_df.astype(float) * 100).round(3).to_string())

print("\nOptimal exit strategy:")
for coin in COINS_PORTFOLIO:
    d = opt_days[coin]
    print(f"  {coin}: exit day {d + 1} ({ensemble_df.index[d]}), "
          f"max expected return = {opt_returns[coin] * 100:+.2f}%")

# ── Step 3: Allocate based on maximum achievable return per coin ──────────────
# Coins with a negative best-case return get 0 weight — capital stays in cash.
def _allocate(returns_row: np.ndarray) -> np.ndarray:
    pos   = np.maximum(returns_row, 0.0)
    total = pos.sum()
    return pos / total if total > 0 else np.zeros(len(returns_row))

opt_ret_arr = np.array([opt_returns[c] for c in COINS_PORTFOLIO])
alloc_w     = _allocate(opt_ret_arr)

print(f"\nPortfolio allocation:")
for coin, wi in zip(COINS_PORTFOLIO, alloc_w):
    print(f"  {coin}: {wi * 100:.1f}%")
print(f"  Cash: {(1 - alloc_w.sum()) * 100:.1f}%")

# ── Step 4: Simulate — hold each coin until its optimal exit day ──────────────
# On exit day the position is liquidated; freed capital becomes cash.
# Portfolio value is marked-to-market every day using the ensemble forecast.
cash      = PRINCIPAL * float(1 - alloc_w.sum())
holdings  = {coin: PRINCIPAL * float(wi) for coin, wi in zip(COINS_PORTFOLIO, alloc_w)}
buy_price = {coin: current_prices[coin] for coin in COINS_PORTFOLIO}

portfolio_log = []
prev_value    = PRINCIPAL

for i, date in enumerate(ensemble_df.index):
    # Cash out any coin that reaches its optimal exit today
    for coin in COINS_PORTFOLIO:
        if i == opt_days[coin] and holdings[coin] > 0:
            pred        = float(ensemble_df[coin].iloc[i])
            bp          = buy_price[coin]
            sell_return = (pred - bp) / bp if bp != 0 else 0.0
            cash       += holdings[coin] * (1 + sell_return)
            holdings[coin] = 0.0

    # Mark-to-market: current value of each still-held position
    coin_values = {}
    for coin in COINS_PORTFOLIO:
        if holdings[coin] > 0:
            pred    = float(ensemble_df[coin].iloc[i])
            bp      = buy_price[coin]
            cum_ret = (pred - bp) / bp if bp != 0 else 0.0
            coin_values[coin] = holdings[coin] * (1 + cum_ret)
        else:
            coin_values[coin] = 0.0

    portfolio_value = cash + sum(coin_values.values())
    daily_return    = (portfolio_value - prev_value) / prev_value if prev_value != 0 else 0.0
    prev_value      = portfolio_value

    row = {
        'date':            date,
        'portfolio_value': round(portfolio_value, 2),
        'daily_return_%':  round(daily_return * 100, 4),
    }
    for coin in COINS_PORTFOLIO:
        pv = portfolio_value if portfolio_value > 0 else 1
        row[f'{coin}_%'] = round(coin_values[coin] / pv * 100, 2)
    row['cash_%'] = round(cash / portfolio_value * 100, 2) if portfolio_value > 0 else 100.0
    portfolio_log.append(row)

result_df = pd.DataFrame(portfolio_log).set_index('date')

print(f"\n{'Principal':20s}  ${PRINCIPAL:>12,.2f}")
print(f"{'Final portfolio':20s}  ${portfolio_value:>12,.2f}")
print(f"{'Total return':20s}  {((portfolio_value / PRINCIPAL) - 1) * 100:>+11.2f}%")
print()
print(result_df.to_string())

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

fig, (ax_val, ax_alloc) = plt.subplots(
    2, 1, figsize=(13, 9), gridspec_kw={'height_ratios': [3, 2]}
)

# ── Portfolio value over time ─────────────────────────────────────────────────
pv_series   = [PRINCIPAL] + result_df['portfolio_value'].tolist()
date_labels = ['Now'] + result_df.index.tolist()
x           = range(len(pv_series))

ax_val.plot(x, pv_series, marker='o', linewidth=2.5, color='#2196F3', zorder=3)
ax_val.fill_between(x, PRINCIPAL, pv_series,
                    where=[v >= PRINCIPAL for v in pv_series],
                    color='#4CAF50', alpha=0.25, label='Gain')
ax_val.fill_between(x, PRINCIPAL, pv_series,
                    where=[v < PRINCIPAL for v in pv_series],
                    color='#F44336', alpha=0.25, label='Loss')
ax_val.axhline(PRINCIPAL, color='gray', linestyle='--', alpha=0.7,
               label=f'Principal ${PRINCIPAL:,.0f}')
for i, v in enumerate(pv_series):
    ax_val.annotate(f'${v:,.0f}', (i, v),
                    textcoords='offset points', xytext=(0, 10),
                    ha='center', fontsize=8)
ax_val.set_xticks(list(x))
ax_val.set_xticklabels(date_labels, rotation=20, ha='right')
ax_val.yaxis.set_major_formatter(mtick.StrMethodFormatter('${x:,.0f}'))
ax_val.set_ylabel('Portfolio Value (USD)')
ax_val.set_title(
    f'Portfolio Optimisation  —  {", ".join(COINS_PORTFOLIO)}  |  Principal: ${PRINCIPAL:,.0f}',
    fontsize=13, pad=10
)
ax_val.legend()
ax_val.grid(True, alpha=0.3)

# ── Daily allocation stacked bar ──────────────────────────────────────────────
alloc_cols = [f'{c}_%' for c in COINS_PORTFOLIO] + ['cash_%']
bar_labels = COINS_PORTFOLIO + ['Cash']
bar_colors = ['#FF9800', '#4CAF50', '#2196F3', '#9C27B0', '#00BCD4', '#F44336', '#795548']
x_pos      = range(len(result_df))
bottom     = np.zeros(len(result_df))

for idx, (col, label) in enumerate(zip(alloc_cols, bar_labels)):
    vals = result_df[col].values
    ax_alloc.bar(x_pos, vals, bottom=bottom, label=label,
                 color=bar_colors[idx % len(bar_colors)], alpha=0.85)
    for j, (v, b) in enumerate(zip(vals, bottom)):
        if v > 5:
            ax_alloc.text(j, b + v / 2, f'{v:.0f}%',
                          ha='center', va='center',
                          fontsize=9, fontweight='bold', color='white')
    bottom += vals

ax_alloc.set_xticks(list(x_pos))
ax_alloc.set_xticklabels(result_df.index.tolist(), rotation=20, ha='right')
ax_alloc.set_ylabel('Allocation (%)')
ax_alloc.set_ylim(0, 115)
ax_alloc.legend(loc='upper right')
ax_alloc.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()