In [None]:
# Treasury Forecasting algo version #1 determining the best investnent window
    # NOTE 2/12/2025: This has been replace by the windows.ipynb notebook entry point
# 2/18/2025
# Next steps:
#   1. Return more than one investment window
#   2. Implementation of the asset class policy
#   3. Implementation of the amounts that must mature

import mysql.connector
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
load_dotenv(verbose=True)

# Database connection parameters
DB_CONFIG = {
    "host": os.getenv("DB_HOST"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD"),
    "database": os.getenv("DB_NAME"),
}

print('Database URL: ' + os.environ.get('DB_HOST'))

In [None]:
# Fetch data from database and return as a DataFrame
def fetch_data(table_name, column_names='*', condition='1'):
    try:
        # Connect
        conn = mysql.connector.connect(**DB_CONFIG)
        cursor = conn.cursor()
        # Fetch
        query = f"SELECT {column_names} FROM {table_name} WHERE {condition}"
        cursor.execute(query)
        # Fetch column names
        columns = [col[0] for col in cursor.description]
        # Fetch data
        data = cursor.fetchall()
        df = pd.DataFrame(data, columns=columns)
        return df
    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None
    finally:
        if 'conn' in locals() and conn.is_connected():
            cursor.close()
            conn.close()

In [None]:
# Query the running balance of the asset classes
table_name = 'assetclassbalances'
column_names = 'TransactionDate, Portfolio, Cash_Sweep, US_Treasuries, CDs, CommercialPapers, MoneyMarket, MutualFund, US_Agencies'
condition = 'Title '
assets = fetch_data(table_name, column_names)

assets


In [None]:
# Query the asset class max. percentages
table_name = 'AssetClass'
column_names = 'Title AS "AssetClass", PercentMax'
condition = 'AssetClassParentID = 0 && Title != "Not Assigned"'
max_percent = fetch_data(table_name, column_names, condition)
if max_percent is not None:
    # Insert an asset class key for column mapping
    asset_class_mapping = {
        'Cash/Sweep': 'Cash_Sweep',
        'US Treasuries': 'US_Treasuries',
        'Certificate of Deposit': 'CDs',
        'Commercial Paper': 'CommercialPapers',
        'Money Market': 'MoneyMarket',
        'Mutual Fund': 'MutualFund',
        'US Agencies': 'US_Agencies'
    }
    max_percent['AssetClassKey'] = max_percent['AssetClass'].map(asset_class_mapping)

    # Convert PercentMax to float
max_percent['PercentMax'] = max_percent['PercentMax'].astype(float)

print(max_percent)
# max_percent.info()

In [None]:
# Get the available cash min and max values and prorate the cash intervals
min_cash = assets['Cash_Sweep'].min()
max_cash = assets['Cash_Sweep'].max()
print(f"Min:{min_cash:,}" + '  ' f"Max:{max_cash:,}")

# Create a df with values between min_cash and max_cash (inclusive) named 'Cash'.
def prorate_cash_values(num_values, min_cash, max_cash):
    prorated_values = pd.Series(np.linspace(min_cash, max_cash, num_values))
    prorated_df = pd.DataFrame({'Cash': prorated_values})
    prorated_df['Cash'] = prorated_df['Cash'].astype(int)
    return prorated_df

# Prorate the cash interval values
min_investment = 10000
num_values = int((max_cash - min_cash) / min_investment)
print(num_values)

prorated_df = prorate_cash_values(num_values, min_cash, max_cash)
prorated_df


In [None]:
# Reverse test (check) the increment delta by subtracting the second cash value from the first
#     Q: Why is there a difference?
increment = prorated_df['Cash'].iloc[1] - prorated_df['Cash'].iloc[0]
f"For {num_values:,} prorated values the increment is: ${increment:,}"

In [None]:
# For each prorated cash value, compare it with the available cash_sweep value per day (True|False matrix)

# base dataframe
algo_df = assets[['TransactionDate', 'Cash_Sweep']].sort_values(by='TransactionDate', ascending=True).rename(columns={'TransactionDate': 'Date', 'Cash_Sweep': 'Cash'})

# matrix of comparisons (all at once)
cash_values = prorated_df['Cash'].values
cash_matrix = algo_df['Cash'].astype(float).values[:, np.newaxis] >= cash_values
column_names = [f'Less_{val}' for val in cash_values]

# result DataFrame
result_df = pd.DataFrame(cash_matrix, columns=column_names, index=algo_df.index)
# Combine the matix with the original dataframe
algo_df = pd.concat([algo_df, result_df], axis=1)

algo_df

In [None]:
# Count the max. number of vertically consecutive True values in each column

# Count consecutive True values
def count_consecutive_true(series):
    # Convert boolean series to integers (True -> 1, False -> 0)
    int_series = series.astype(int)
    # Create groups when values change
    groups = (int_series != int_series.shift()).cumsum()
    # Count consecutive values in each group
    consecutive_counts = int_series.groupby(groups).cumsum()
    # Return maximum count
    return consecutive_counts.max() if len(consecutive_counts) > 0 else 0

# Apply the function to each boolean column (skip Date and Cash columns) and store in a series
max_consecutive_days = algo_df.iloc[:, 2:].apply(count_consecutive_true)
print("Maximum consecutive True values for each threshold:")

max_consecutive_days


In [None]:
# Convert the series to a DataFrame and store the max. consecutive days count and also compute the return
max_consecutive_days_df = max_consecutive_days.reset_index()

# Drop the prefic "Less_" from the index column values and convert it to an integer
max_consecutive_days_df['index'] = max_consecutive_days_df['index'].str.replace('Less_', '').astype(int)

# Rename the columns
max_consecutive_days_df.columns = ['Cash', 'Days']

# Multiply both columns and store the result in a third column "Return"
max_consecutive_days_df['Return'] = max_consecutive_days_df['Cash'] * max_consecutive_days_df['Days']


In [None]:
#sort by Return and return the top x rows
max_consecutive_days_df.sort_values(by='Return', ascending=False).head(10)

#TODO: Get the top x day counts (unique values in the Days column)
#TODO: Get the amount and the consecutive TRUEs (days) instead of the max consecutive days count
#TODO: Look up the rows in the algo_df that match the top x day counts
#TODO: The start and end dates for each window instead of the following max. consecutive days


In [None]:
# Get the Cash, Days, and Return (row) with maximum return
max_return_row = max_consecutive_days_df.loc[max_consecutive_days_df['Return'].idxmax()]
print(max_return_row)

isolated_result_df = algo_df[[f"Less_{max_return_row['Cash']}", 'Date', 'Cash']]
isolated_result_df

In [None]:
# Finally using the count_consecutive_true function, find the maximum number of consecutive days
# and return the start and end dates and also the max. Cash value

def find_consecutive_dates(series):
    # Convert to boolean series
    bool_series = series.astype(bool)
    # Find where values change
    changes = bool_series != bool_series.shift()
    # Create groups
    groups = changes.cumsum()

    # Find lengths of True sequences
    lengths = bool_series.groupby(groups).sum()
    # Find the group with maximum consecutive True values
    max_group = lengths[lengths == lengths.max()].index[0]

    # Get dates where this group starts and ends
    dates = series.index[groups == max_group]
    start_date_index = dates[0]
    end_date_index = dates[-1]

    start_date = algo_df.loc[start_date_index, 'Date']
    end_date = algo_df.loc[end_date_index, 'Date']

    return start_date, end_date, lengths.max()

# Get column name with max consecutive days
col = f"Less_{max_return_row['Cash']}"

# Find start date, end date and max consecutive days
start_date, end_date, max_days = find_consecutive_dates(algo_df[col])

print("Max. ROI investment window:")
print(f"Start date: {start_date}")
print(f"End date: {end_date}")
print(f"Length: {max_days} days")
print(f"Available cash: ${int(max_return_row['Cash']):,}")

