In [1]:
import pandas as pd
import requests
import numpy as np
import yfinance as yf
%matplotlib inline
import random
import plotly.io as pio

import datetime as dt
import plotly.graph_objects as go

import os
from dotenv import load_dotenv

import datetime as dt
from datetime import timedelta

from stable_baselines3 import PPO
import re

import streamlit as st

from diskcache import Cache

pio.templates["custom"] = pio.templates["plotly"]
pio.templates["custom"].layout.font.family = "Cardo"

font_family = "Cardo"

# Set the default template
pio.templates.default = "custom"

In [2]:
load_dotenv()

True

In [3]:
COINGEKCO_KEY = os.getenv('COINGEKCO_KEY')

In [4]:
def normalize_log_returns(log_returns_df, start_date, end_date, normalize_value=1e4):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Filter the data based on the start date and end date
    filtered_data = log_returns_df[(log_returns_df.index >= start_date) & 
                                   (log_returns_df.index <= end_date)].copy()
    
    if filtered_data.empty:
        print("Filtered data is empty after applying the date filter.")
        return pd.DataFrame()

    # Initialize normalized values
    normalized_values = [normalize_value]
    dates = [filtered_data.index[0]]  # Start date
    versions = [filtered_data['version'].iloc[0]]  # Track the first version

    # Compute normalized returns
    for timestamp, log_return, version in zip(filtered_data.index, filtered_data['Return'], filtered_data['version']):
        normalized_values.append(normalized_values[-1] * np.exp(log_return))
        dates.append(timestamp)
        versions.append(version)  # Track the version used at each timestamp

    # Create DataFrame
    normalized_returns_df = pd.DataFrame({
        'Normalized_Return': normalized_values[1:],  # Exclude initial value
        'version': versions[1:]  # Exclude initial version
    }, index=dates[1:])

    return normalized_returns_df

In [5]:
os.listdir()

['.ipynb_checkpoints',
 'eda.ipynb',
 'flipside_tests.ipynb',
 'live_arbitrum_classifier_cache',
 'live_model.ipynb',
 'model_cache_analysis.ipynb',
 'rebal_func.ipynb',
 'seed_test.ipynb',
 'train_model.ipynb']

In [6]:
live_results_path = r'E:\Projects\portfolio_optimizers\classifier_optimizer\live_results'

In [7]:
file_list = os.listdir('E:\Projects\portfolio_optimizers\classifier_optimizer\live_results')

model_names = sorted(set(re.findall(r'v\d{2}', ' '.join(file_list))))

print(model_names)

['v01']


In [8]:
model_data = pd.DataFrame()

for model_name in model_names:
    values = pd.read_csv(f'E:\Projects\portfolio_optimizers\classifier_optimizer\live_results\{model_name}.csv')
    values['version'] = model_name

    model_data = pd.concat([model_data,values])

model_data


Unnamed: 0,index,Return,version
0,2025-02-26 06:00:00,0.0,v01
1,2025-02-26 07:00:00,-0.005743,v01
2,2025-02-26 08:00:00,0.006349,v01
3,2025-02-26 09:00:00,0.006858,v01
4,2025-02-26 10:00:00,-0.008006,v01
5,2025-02-26 11:00:00,0.003449,v01
6,2025-02-26 12:00:00,-0.000574,v01
7,2025-02-26 13:00:00,-0.01385,v01
8,2025-02-26 14:00:00,-0.014035,v01
9,2025-02-26 15:00:00,-0.00118,v01


In [9]:
model_data['index'] = pd.to_datetime(model_data['index'])
model_data.set_index('index',inplace=True)


In [10]:
filled_data = model_data.resample('h').agg({
    "Return":'last',
    "version":'last'
})

In [11]:
norm_model_returns = normalize_log_returns(filled_data, filled_data.index.min(), filled_data.index.max(),100)

In [12]:
def plot_continuous_return_with_versions(df, title="Portfolio Return Over Time"):
    """Plots a continuous normalized return line but visually separates different model versions."""

    if df.empty:
        raise ValueError("DataFrame is empty. Please provide valid data.")

    if 'Normalized_Return' not in df.columns or 'version' not in df.columns:
        raise ValueError("DataFrame must contain 'Normalized_Return' and 'version' columns.")

    # Ensure data is sorted by time
    df = df.sort_index()

    # Forward-fill missing values for continuity
    df['Normalized_Return'] = df['Normalized_Return'].ffill()
    df['version'] = df['version'].fillna("Unknown")  # Ensure no NaN values in 'version'

    # Create figure
    fig = go.Figure()

    # Get unique versions in order of appearance
    unique_versions = df['version'].unique()
    print(F'unique_versions: {unique_versions}')

    # Identify version change points
    version_change_points = df['version'] != df['version'].shift(1)
    version_change_times = df.index[version_change_points]

    # Avoid adding dashed lines if there's only one version
    add_dashed_lines = len(unique_versions) > 1

    # Loop through each version and plot it as a segment
    for idx, version in enumerate(unique_versions):
        version_df = df[df['version'] == version].copy()

        if version_df.empty:
            continue  # Skip empty segments

        # Ensure continuity: Include the last value of the previous version
        if idx > 0:
            prev_version_df = df[df['version'] == unique_versions[idx - 1]]
            if not prev_version_df.empty:
                last_old_value = prev_version_df.iloc[-1:].copy()
                version_df = pd.concat([last_old_value, version_df])

        # Define color for this version
        color = f"hsl({(idx * 50) % 360}, 70%, 50%)"  # Generate distinct colors

        # Plot version as part of a single continuous line
        fig.add_trace(go.Scatter(
            x=version_df.index,
            y=version_df['Normalized_Return'],
            mode='lines',  # Only lines to keep smooth transitions
            line=dict(color=color, width=4),
            name=f"Version {version}"
        ))

    # Add vertical dashed lines at version change points (only if there are multiple versions)
    if add_dashed_lines:
        for version_time, version in zip(version_change_times, df['version'][version_change_points]):
            fig.add_shape(
                go.layout.Shape(
                    type="line",
                    x0=version_time, x1=version_time,
                    y0=df['Normalized_Return'].min(), y1=df['Normalized_Return'].max(),
                    line=dict(color="black", width=2, dash="dash")  # Dashed vertical line
                )
            )

            # Add version annotation near the top of the plot
            fig.add_annotation(
                x=version_time, 
                y=df['Normalized_Return'].max(),
                text=f"Version {version}",
                showarrow=False,
                font=dict(size=14, color="black"),
                yshift=10
            )

    # Customize layout
    fig.update_layout(
        title=title,
        xaxis_title="Time",
        yaxis_title="Normalized Return",
        xaxis=dict(showgrid=True),
        yaxis=dict(showgrid=True),
        legend_title="Model Versions",
        template="plotly_white"
    )

    return fig


In [13]:
norm_model_returns

Unnamed: 0,Normalized_Return,version
2025-02-26 06:00:00,100.0,v01
2025-02-26 07:00:00,99.427309,v01
2025-02-26 08:00:00,100.060603,v01
2025-02-26 09:00:00,100.749159,v01
2025-02-26 10:00:00,99.945768,v01
2025-02-26 11:00:00,100.291068,v01
2025-02-26 12:00:00,100.233554,v01
2025-02-26 13:00:00,98.854893,v01
2025-02-26 14:00:00,97.477119,v01
2025-02-26 15:00:00,97.362206,v01


In [14]:
fig = plot_continuous_return_with_versions(norm_model_returns)
fig.show()

unique_versions: ['v01']


In [15]:
os.chdir('..')

In [16]:
base_cache_dir = r'E:\Projects\portfolio_optimizers\classifier_optimizer'
global_classifier_cache = Cache(os.path.join(base_cache_dir, 'global_classifier_cache'))

In [17]:
import requests
import json

url = "https://api.coingecko.com/api/v3/coins/defipulse-index/market_chart?vs_currency=usd&days=365"

headers = {
    "accept": "application/json",
    "x-cg-demo-api-key": COINGEKCO_KEY
}

response = requests.get(url, headers=headers)
response_text = response.text
data = json.loads(response_text)
df_prices = pd.DataFrame(data["prices"], columns=["timestamp", "price"])
df_prices["timestamp"] = pd.to_datetime(df_prices["timestamp"], unit='ms')  # Convert to datetime
df_prices.set_index('timestamp',inplace=True)
model_name = global_classifier_cache.get('current_model_name')
df = pd.read_csv(f'E:/Projects/portfolio_optimizers/classifier_optimizer/results/{model_name}/norm_returns.csv')
df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])
df.set_index('Unnamed: 0',inplace=True)
daily_df = df.resample('D').last()
daily_df.index = pd.to_datetime(daily_df.index.strftime('%Y-%m-%d'))
df_prices.rename(columns={'price':'DPI Price'},inplace=True)
daily_df.rename(columns={"Return":"Portfolio Return"},inplace=True)
analysis_df = pd.merge(
    df_prices,
    daily_df,
    left_index=True,
    right_index=True,
    how='inner'
)

In [22]:
analysis_df

Unnamed: 0,DPI Price,Portfolio Return
2025-01-12,132.884904,100.266505
2025-01-13,131.207207,99.559836
2025-01-14,127.816116,104.109374
2025-01-15,133.163905,113.872129
2025-01-16,144.168956,126.995832
2025-01-17,139.58426,154.843689
2025-01-18,149.700699,153.978711
2025-01-19,138.318733,165.542216
2025-01-20,133.915371,171.276958
2025-01-21,139.439875,170.633029


In [18]:
from python_scripts.utils import calculate_cagr, calculate_beta


Valid config keys have changed in V2:
* 'fields' has been removed



In [19]:
dpi_cagr = calculate_cagr(analysis_df['DPI Price'])
current_risk_free = 0.047
dpi_cumulative_risk_premium = dpi_cagr - current_risk_free
portfolio_cagr = calculate_cagr(analysis_df['Portfolio Return'])
portfolio_beta = calculate_beta(analysis_df, 'DPI Price'
                                ,'Portfolio Return')
portfolio_expected_return = current_risk_free + (portfolio_beta*dpi_cumulative_risk_premium)
f'{portfolio_expected_return * 100:.2f}%'

cagr history: 2025-01-12    132.884904
2025-01-13    131.207207
2025-01-14    127.816116
2025-01-15    133.163905
2025-01-16    144.168956
2025-01-17    139.584260
2025-01-18    149.700699
2025-01-19    138.318733
2025-01-20    133.915371
2025-01-21    139.439875
2025-01-22    145.637174
2025-01-23    136.703619
2025-01-24    136.155522
2025-01-25    135.128705
2025-01-26    135.088818
2025-01-27    129.257752
2025-01-28    124.374211
2025-01-29    115.933171
2025-01-30    120.802051
2025-01-31    128.499554
2025-02-01    132.994534
2025-02-02    119.925272
2025-02-03    104.734885
2025-02-04    111.847078
2025-02-05    104.134375
2025-02-06    101.755116
2025-02-07     96.114562
2025-02-08     95.783755
2025-02-09     97.185088
2025-02-10     95.589051
2025-02-11     99.724028
2025-02-12     98.781501
2025-02-13    103.050341
2025-02-14    101.630499
2025-02-15    105.813402
2025-02-16    103.204766
2025-02-17    101.801678
2025-02-18    104.998820
2025-02-19    100.402879
2025-02-20 

'72.50%'

# Expected return over one year (annualzied)

Cost of Equity (14.98%) indicates the annualized rate of return required by investors for holding equity in this portfolio.
It represents the compensation investors expect for the risk they take by investing in this portfolio instead of a risk-free asset.

In [20]:
portfolio_expected_return

0.7249868530777032

In [21]:
def calculate_expected_return():
    url = "https://api.coingecko.com/api/v3/coins/defipulse-index/market_chart?vs_currency=usd&days=365"

    headers = {
        "accept": "application/json",
        "x-cg-demo-api-key": COINGEKCO_KEY
    }

    response = requests.get(url, headers=headers)
    response_text = response.text
    data = json.loads(response_text)
    df_prices = pd.DataFrame(data["prices"], columns=["timestamp", "price"])
    df_prices["timestamp"] = pd.to_datetime(df_prices["timestamp"], unit='ms')  # Convert to datetime
    df_prices.set_index('timestamp',inplace=True)
    model_name = global_classifier_cache.get('current_model_name')
    df = pd.read_csv(f'E:/Projects/portfolio_optimizers/classifier_optimizer/results/{model_name}/norm_returns.csv')
    df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])
    df.set_index('Unnamed: 0',inplace=True)
    daily_df = df.resample('D').last()
    daily_df.index = pd.to_datetime(daily_df.index.strftime('%Y-%m-%d'))
    df_prices.rename(columns={'price':'DPI Price'},inplace=True)
    daily_df.rename(columns={"Return":"Portfolio Return"},inplace=True)
    analysis_df = pd.merge(
        df_prices,
        daily_df,
        left_index=True,
        right_index=True,
        how='inner'
    )
    dpi_cagr = calculate_cagr(analysis_df['DPI Price'])
    # current_risk_free = 0.047
    dpi_cumulative_risk_premium = dpi_cagr - current_risk_free
    portfolio_cagr = calculate_cagr(analysis_df['Portfolio Return'])
    portfolio_beta = calculate_beta(analysis_df, 'DPI Price'
                                    ,'Portfolio Return')
    portfolio_expected_return = current_risk_free + (portfolio_beta*dpi_cumulative_risk_premium)
    f'{portfolio_expected_return * 100:.2f}%'
    
    return portfolio_expected_return
    