#Setup google-drive mounting (optional)

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

In [None]:
# prompt: create a symbolic link to a google drive workdir 'xyz' to the root of colab

# Specify the path to your folder
gdrive = '/content/drive/MyDrive/AI/2025'
workdir = '/datasets'
slink = '/content' + workdir
fullpath = gdrive + workdir

# Check if the folder exists
if os.path.exists(fullpath):
  # Create the symbolic link
  try:
    os.symlink(fullpath, slink)
    print(f"Symbolic link created from '{fullpath}' to '{slink}'")
  except FileExistsError:
    print(f"Symbolic link '{slink}' already exists.")
  except OSError as e:
    print(f"Error creating symbolic link: {e}")
else:
  print(f"Error: Folder '{fullpath}' not found.")

# !ls -lh /content

In [None]:
# !unzip datasets/datasets.zip -d datasets/
# !rm datasets/datasets.zip

#Loading real test dataset

In [None]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

folder = './datasets/247/hourly/'

IB = 100.0 # initial balance for each asset
#SPLIT_DATE = '2024-08-26'

#Loading saved backtests

In [None]:
# prompt: load all saved backtests from folder into backtests list
import os

# Assuming 'backtests' is the directory containing the CSV files
backtest_dir = os.path.join(folder, 'backtests')
backtests = []
titles = []

# Iterate through the files in the directory
for filename in os.listdir(backtest_dir):
  if filename.endswith(".csv"):
    filepath = os.path.join(backtest_dir, filename)
    try:
      # Read the CSV file into a Pandas DataFrame
      df = pd.read_csv(filepath)
      if (filename == 'buy-and-hold.csv'):
        baseline = df
        print(f"Baseline loaded: {filename}")
      else:
        backtests.append(df)
        titles.append(filename.removesuffix('.csv').upper())
        print()
        print(f"Backtest loaded: {filename}")
      display(df.iloc[:,1:21])
    except pd.errors.EmptyDataError:
      print(f"Warning: Skipping empty file: {filename}")
    except pd.errors.ParserError:
      print(f"Warning: Skipping file with parsing error: {filename}")

In [None]:
titles

In [None]:
# prompt: generate a dataset based on df where each cell (except from datertime column) shows how much percent it has grown or reduced compared to the previous' row cell

import pandas as pd

def calculate_percentage_change(df):
  """Calculates the percentage change for each cell compared to the previous row.

  Args:
    df: The input DataFrame.

  Returns:
    A new DataFrame with percentage changes.
  """

  # Create a copy to avoid modifying the original DataFrame
  df_pct_change = df.copy()

  # Iterate through columns (excluding the datetime column)
  for col in df.columns[1:]:  # Assumes the first column is datetime
    # Calculate percentage change for each cell
    df_pct_change[col] = df[col].pct_change() * 100

  # Fill first row with 100
  df_pct_change.iloc[0, 1:] = 0

  return df_pct_change

In [None]:
basedelta = calculate_percentage_change(baseline)
deltas = []

for backtest in backtests:
  deltas.append(calculate_percentage_change(backtest))

In [None]:
# prompt: function that gets two datasets, real and prediction, and returns a third one where each cell is calculated by the rules: ignore first column, absolute value of real cell, if real and prediction cells have different signals, change the signal. dont use iterrows()

import pandas as pd
import numpy as np

def calculate_performance(real_df, prediction_df):
    """
    Processes two datasets, real and prediction, according to the specified rules.

    Args:
        real_df: DataFrame representing the real data.
        prediction_df: DataFrame representing the prediction data.

    Returns:
        A new DataFrame with the calculated values, or None if input DataFrames are invalid.
    """

    # Input validation
    if not isinstance(real_df, pd.DataFrame) or not isinstance(prediction_df, pd.DataFrame):
        print("Error: Inputs must be pandas DataFrames.")
        return None

    if real_df.shape != prediction_df.shape:
      print("Error: DataFrames must have the same shape.")
      return None

    if not all(real_df.columns == prediction_df.columns):
      print("Error: DataFrames must have the same columns.")
      return None

    # Create a copy to avoid modifying the original DataFrame
    result_df = real_df.copy()

    # Apply the rules to each column (excluding the first one)
    for col in real_df.columns[1:]:
        # Take the absolute value of real values
        result_df[col] = np.abs(real_df[col])

        # Identify cells with different signals
        different_signals = np.sign(real_df[col]) != np.sign(prediction_df[col])

        # Change the signal for cells with different signals
        result_df.loc[different_signals, col] = -result_df.loc[different_signals, col]
    return result_df

baseline_idx = 0 # set to buy-ald-hold delta
performances = []

for i in range(len(deltas)):
  print(f"Calculating performance for {titles[i]}")
  performance_df = calculate_performance(basedelta, deltas[i])
  performances.append(performance_df)
  print(titles[i])
  display(performances[i].iloc[:,1:21])

In [None]:
# prompt: copy last row of wallet0 with title, sort it ascending, subtract each value from IB
def sort_last_row(df, initial_balance_to_subtract):
  last_row = df.tail(1).copy()
  last_row_subtracted = last_row.iloc[:, 1:] - initial_balance_to_subtract
  last_row_subtracted_t = last_row_subtracted.T
  last_row_subtracted_sorted = last_row_subtracted_t.sort_values(by=last_row_subtracted_t.columns[0], ascending=True)
  return last_row_subtracted_sorted.T

In [None]:
# prompt: create heatmap from performance_df, positive values in blue, negative values in red

import seaborn as sns
import matplotlib.pyplot as plt

for i in range(len(performances)):
  plt.figure(figsize=(20, 8))
  sort = sort_last_row(performances[i].iloc[:, 1:], IB)
  sns.heatmap(performances[i].iloc[:, 1:][sort.columns], annot=False, cmap="RdBu", center=0) # Assuming the first column is an index
  plt.title(f'{titles[i].upper()} Performance Heatmap')
  plt.show()