In [75]:
# Configuration

# days used in calculating ratios
DAYS_RATIO = [3, 5, 10, 15, 30, 60, 120, 250]

# risk free rate
RISK_FREE_RATE = 0.04

In [76]:
import sys
from pathlib import Path

# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils

SOURCE_PATH, DEST_PATH = utils.main_processor(
    data_dir='..\data',  # search project ..\data
    downloads_dir='',  # None searchs Downloads dir, '' omits search
    downloads_limit=0,  # search the first 10 files
    clean_name_override='df_perf_ratios.pkl',  # override filename
    start_file_pattern='df_OHLCV_', # search for files starting with 'df_'
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\python310.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py310\\stocks\\src', 'c:\\Users\\ping\\Files_win10\\python\\py310\\stocks\\src', 'c:\\Users\\ping\\Files_win10\\python\\py310\\stocks\\src', 'c:\\Users\\ping\\Files_win10\\python\\py310\\stocks\\src', 

**Available 'df_OHLCV_' files:**

- (1) `[DATA]` `df_OHLCV_2025-03-14_clean.pkl` <span style='color:#00ffff'>(25.33 MB, 2025-03-23 18:29)</span>


Input a number to select file (1-1)



    **Selected paths:**
    - Source: `..\data\df_OHLCV_2025-03-14_clean.pkl`  
    - Destination: `..\data\df_perf_ratios.pkl`
    

In [77]:
import pandas as pd

# Get tickers from df_finviz.pkl file
df_finviz = pd.read_pickle('..\data\df_finviz.pkl')
tickers = df_finviz.index.to_list()

In [78]:
import pandas as pd

# Load the DataFrame from the pickle file
df = pd.read_pickle(SOURCE_PATH)

# Display the first few rows of the DataFrame to verify
df = df[['Adj Close']].copy()
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close
Symbol,Date,Unnamed: 2_level_1
UBS,2025-03-14,32.73
UBS,2025-03-13,31.71
UBS,2025-03-12,31.94
UBS,2025-03-11,31.38
UBS,2025-03-10,31.88
...,...,...
PCVX,2024-03-21,67.64
PCVX,2024-03-20,69.13
PCVX,2024-03-19,67.60
PCVX,2024-03-18,68.04


In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 344750 entries, ('UBS', Timestamp('2025-03-14 00:00:00')) to ('PCVX', Timestamp('2024-03-15 00:00:00'))
Data columns (total 1 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   Adj Close  344750 non-null  float64
dtypes: float64(1)
memory usage: 4.0+ MB


In [80]:
import pandas as pd

def select_tickers_data(df, tickers):
  """
  Selects data for a list of tickers from a DataFrame with a MultiIndex
  where the first level is the ticker and the second level is the date.

  Args:
    df (pd.DataFrame): The input DataFrame with a MultiIndex.
    tickers (list): A list of ticker symbols to select.

  Returns:
    pd.DataFrame: A DataFrame containing only the data for the specified tickers.
                  Rows for tickers not found will not be included.
  """
  valid_tickers = [t for t in tickers if t in df.index.get_level_values(0)]  # Filter out the tickers that do not exist

  if not valid_tickers:
    print("No valid tickers found in the DataFrame. Returning an empty DataFrame.")
    return pd.DataFrame()

  try:
    ticker_df = df.loc[valid_tickers]
    return ticker_df
  except KeyError as e:
    print(f"KeyError after filtering valid tickers: {e}") #Added to help debugging.
    return pd.DataFrame()


# Example usage:
# ticker_list = ["UBS", "AAPL", "MSFT", "GEV"]  # Include some valid and invalid tickers
selected_data = select_tickers_data(df, tickers)
print(selected_data)

                   Adj Close
Symbol Date                 
AAPL   2025-03-14     213.49
       2025-03-13     209.68
       2025-03-12     216.98
       2025-03-11     220.84
       2025-03-10     227.48
...                      ...
AIRR   2024-03-21      65.66
       2024-03-20      64.36
       2024-03-19      63.41
       2024-03-18      63.21
       2024-03-15      63.25

[344750 rows x 1 columns]


In [81]:
import utils

list_dfs = utils.get_latest_dfs(selected_data, DAYS_RATIO)

In [85]:
all_results = {}

for _df in list_dfs:
    tickers_in_df = _df.index.get_level_values(0).unique()
    for ticker in tickers_in_df:
        result_df = utils.analyze_stock(_df, ticker, risk_free_rate=RISK_FREE_RATE)
        if result_df is not None:
            # Extract the ticker name from the result_df index
            ticker_name = result_df.index[0]
            # Convert the single-row DataFrame to a dictionary
            metrics = result_df.iloc[0].to_dict()
            
            # Update the existing ticker entry or create a new one
            if ticker_name in all_results:
                all_results[ticker_name].update(metrics)
            else:
                all_results[ticker_name] = metrics

if all_results:
    combined_df = pd.DataFrame.from_dict(all_results, orient='index')
    print("\nCombined performance metrics DataFrame:")
    print(combined_df)
else:
    print("No performance metrics were calculated.")

  np.divide(average_annual_return, annualized_downside_risk, out=out)
  np.divide(



Combined performance metrics DataFrame:
      Sharpe 3d  Sortino 3d  Omega 3d  Sharpe 5d  Sortino 5d  Omega 5d  \
A      2.391301    6.077265  1.541406  -0.967958   -1.857841  0.850624   
AA     1.559067    3.621075  1.322591   9.832251   22.572613  3.843882   
AAPL  -3.420852   -5.243674  0.532856 -10.609635  -10.355952  0.222952   
ABBV  -5.482265   -7.366660  0.343726 -14.058346  -11.366243  0.036393   
ABEV  58.252193         inf       NaN   6.300423   13.580392  2.710969   
...         ...         ...       ...        ...         ...       ...   
ZG    -2.603426   -4.226575  0.623467  -2.300781   -3.165323  0.702736   
ZM     0.821347    1.772382  1.157896   1.832998    3.315179  1.356618   
ZS     0.209591    0.427158  1.038054   1.851402    3.250818  1.349685   
ZTO   25.822797         inf       NaN  -0.275099   -0.456699  0.958836   
ZTS   -1.819928   -3.132050  0.720975 -11.861555  -10.493258  0.104667   

      Sharpe 10d  Sortino 10d  Omega 10d  Sharpe 15d  ...  Omega 30d  

In [86]:
import pandas as pd
import numpy as np
from tabulate import tabulate

_symbols = ['USFR','AAPL', 'MSFT', 'GOOG', 'NVDA', 'IBIT', 'GLD', 'VCIT']
replacement_value = 1e5  # Define replacement value once

# Check for NaN or Inf values
has_nan_inf = combined_df.isnull().any().any() or ~np.isfinite(combined_df).values.all()

if has_nan_inf:
    print(f"combined_df contains NaN or Inf values. Replacing them with {replacement_value}.")
    df_copy = combined_df.copy()  # Copy the DataFrame
    df_copy = df_copy.fillna(replacement_value).replace([np.inf], replacement_value)  # Replace NaN and positive Inf in one line
    print(f"Replaced NaN or Inf values in combined_df with {replacement_value}.")
    df_to_save = df_copy  # Use the modified copy for saving
else:
    print("combined_df contains no NaN or Inf values.")
    df_to_save = combined_df  # Use the original DataFrame for saving

# Save the DataFrame to pickle file (always do this)
df_to_save.to_pickle(DEST_PATH)
print(f"DataFrame successfully saved to {DEST_PATH}")

# Create and display the formatted table
selected_df = df_to_save.loc[_symbols] #Use df_to_save here
print(tabulate(selected_df, headers='keys', tablefmt='grid', floatfmt='.4f'))

combined_df contains NaN or Inf values. Replacing them with 100000.0.
Replaced NaN or Inf values in combined_df with 100000.0.
DataFrame successfully saved to ..\data\df_perf_ratios.pkl
+------+-------------+--------------+-------------+-------------+--------------+-------------+--------------+---------------+-------------+--------------+---------------+-------------+--------------+---------------+-------------+--------------+---------------+-------------+---------------+----------------+--------------+---------------+----------------+--------------+
|      |   Sharpe 3d |   Sortino 3d |    Omega 3d |   Sharpe 5d |   Sortino 5d |    Omega 5d |   Sharpe 10d |   Sortino 10d |   Omega 10d |   Sharpe 15d |   Sortino 15d |   Omega 15d |   Sharpe 30d |   Sortino 30d |   Omega 30d |   Sharpe 60d |   Sortino 60d |   Omega 60d |   Sharpe 120d |   Sortino 120d |   Omega 120d |   Sharpe 250d |   Sortino 250d |   Omega 250d |
| USFR |      5.2371 |      19.6354 |      2.7493 |     -0.5308 |      -