In [132]:
# process_files.py
from config import date_str, DOWNLOAD_DIR, DEST_DIR
from pathlib import Path  # Better path handling

# Build paths
SELECTED_STOCK_PATH =f'../picks/{date_str}_selected_stocks.parquet'
print(f'date_str: {date_str}')
print(f'SELECTED_STOCK_PATH: {SELECTED_STOCK_PATH}')

date_str: 2025-04-07
SELECTED_STOCK_PATH: ../picks/2025-04-07_selected_stocks.parquet


In [133]:
import pandas as pd

# Set pandas display options to show more columns and rows
pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.max_rows', 10)       # Limit to 10 rows for readability
pd.set_option('display.width', 1000) 

In [134]:
selected_stocks = pd.read_parquet(SELECTED_STOCK_PATH)
selected_stocks_n_weights = selected_stocks['Weight'].copy()

print(f'selected_stocks_n_weights ({type(selected_stocks_n_weights)}):\n{selected_stocks_n_weights}')
print(f'\nselected_stocks_n_weights.index.name: {selected_stocks_n_weights.index.name}')
print(f'\ndate_str: {date_str}')


selected_stocks_n_weights (<class 'pandas.core.series.Series'>):
Ticker
BSCP    0.297630
SHY     0.237228
VGSH    0.233309
SPTS    0.180986
DG      0.013357
UNH     0.013155
BJ      0.010322
CNC     0.007357
KR      0.006657
Name: Weight, dtype: float64

selected_stocks_n_weights.index.name: Ticker

date_str: 2025-04-07


In [135]:
import pandas as pd
import os

# --- process_daily_selections function remains the same ---
def process_daily_selections(selected_stocks_n_weights: pd.Series, date_str: str) -> pd.DataFrame:
    """
    Converts the daily stock selections Series into a standardized DataFrame format.
    (Code is identical to the previous version)
    """
    if not isinstance(selected_stocks_n_weights, pd.Series):
        raise TypeError("selected_stocks_n_weights must be a pandas Series.")

    ticker_col_name = selected_stocks_n_weights.index.name if selected_stocks_n_weights.index.name else 'Ticker'
    selected_stocks_n_weights.index.name = ticker_col_name

    weight_col_name = selected_stocks_n_weights.name if selected_stocks_n_weights.name else 'Weight'

    daily_df = selected_stocks_n_weights.reset_index()

    rename_dict = {ticker_col_name: 'Ticker'}
    if weight_col_name in daily_df.columns:
         rename_dict[weight_col_name] = 'Weight'
    elif 0 in daily_df.columns and len(daily_df.columns) == 2:
         rename_dict[0] = 'Weight'
    else:
         print(f"Warning: Could not automatically identify weight column. Columns found: {daily_df.columns}. Assuming the second column is Weight.")
         if len(daily_df.columns) > 1:
             rename_dict[daily_df.columns[1]] = 'Weight'

    daily_df = daily_df.rename(columns=rename_dict)
    daily_df['Date'] = date_str
    daily_df = daily_df[['Date', 'Ticker', 'Weight']]
    return daily_df

# --- update_tracking_file function remains the same ---
def update_tracking_file(daily_data_df: pd.DataFrame, filename: str = "stock_selections_history.csv"):
    """
    Loads historical selections, appends new daily data, and saves back to CSV.
    Handles empty history file explicitly to avoid concat FutureWarning.
    Saves data sorted chronologically by Date, then Ticker.
    (Code is identical to the previous version)

    Args:
        daily_data_df: DataFrame containing the selections for the current day
                       (should have columns 'Date', 'Ticker', 'Weight').
        filename: The name of the CSV file to load from and save to.

    Returns:
        The updated DataFrame containing all historical and new selections
        (sorted chronologically by Date, then Ticker).
    """
    history_exists = os.path.exists(filename)
    all_selections_df = pd.DataFrame(columns=['Date', 'Ticker', 'Weight']) # Initialize empty

    if history_exists:
        try:
            all_selections_df = pd.read_csv(filename, parse_dates=['Date'])
            print(f"Loaded existing data from {filename}")
            if not all(col in all_selections_df.columns for col in ['Date', 'Ticker', 'Weight']):
                 print("Warning: Loaded file missing expected columns. Treating as empty.")
                 all_selections_df = pd.DataFrame(columns=['Date', 'Ticker', 'Weight'])
            elif all_selections_df.empty:
                 print(f"History file {filename} was empty.")
        except pd.errors.EmptyDataError:
             print(f"History file {filename} is empty. Starting fresh.")
        except Exception as e:
            print(f"Warning: Could not load or parse {filename}. Error: {e}. Starting fresh.")
    else:
        print(f"History file {filename} not found. Creating a new one.")

    if all_selections_df.empty:
        updated_df = daily_data_df.copy()
        print("History was empty, using only new data.")
    else:
        updated_df = pd.concat([all_selections_df, daily_data_df], ignore_index=True)
        print("Appended new data to existing history.")

    try:
        updated_df['Date'] = pd.to_datetime(updated_df['Date'])
        updated_df['Weight'] = pd.to_numeric(updated_df['Weight'], errors='coerce')
        updated_df['Ticker'] = updated_df['Ticker'].astype(str)
    except Exception as e:
        print(f"Warning: Error during data type conversion. Error: {e}")

    updated_df = updated_df.drop_duplicates(subset=['Date', 'Ticker'], keep='last')

    # Sort data chronologically for saving
    updated_df = updated_df.sort_values(by=['Date', 'Ticker']).reset_index(drop=True)
    print("Data sorted chronologically (Date, Ticker) for saving.")

    try:
        updated_df.to_csv(filename, index=False, date_format='%Y-%m-%d')
        print(f"Successfully updated and saved data to {filename}")
    except Exception as e:
        print(f"ERROR: Could not save data to {filename}. Error: {e}")

    return updated_df # Return the chronologically sorted data



In [136]:
daily_df = process_daily_selections(selected_stocks_n_weights, date_str)
# update_tracking_file returns the full data, sorted chronologically
all_data_updated = update_tracking_file(daily_df, filename="stock_selections_history.csv")

print("-" * 20)
# --- MODIFICATION FOR DISPLAY ---
print("Current Full History DataFrame (Displayed with custom sort: Newest Date -> High Weight -> Ticker):")
# Sort again just for this display
display_sorted_df = all_data_updated.sort_values(
    by=['Date', 'Weight', 'Ticker'],
    ascending=[False, False, True] # Date descending, Weight descending, Ticker ascending
).reset_index(drop=True)
print(display_sorted_df)


Loaded existing data from stock_selections_history.csv
Appended new data to existing history.
Data sorted chronologically (Date, Ticker) for saving.
Successfully updated and saved data to stock_selections_history.csv
--------------------
Current Full History DataFrame (Displayed with custom sort: Newest Date -> High Weight -> Ticker):
         Date Ticker    Weight
0  2025-04-07   BSCP  0.297630
1  2025-04-07    SHY  0.237228
2  2025-04-07   VGSH  0.233309
3  2025-04-07   SPTS  0.180986
4  2025-04-07     DG  0.013357
5  2025-04-07    UNH  0.013155
6  2025-04-07     BJ  0.010322
7  2025-04-07    CNC  0.007357
8  2025-04-07     KR  0.006657
9  2025-04-04   FTSM  0.265842
10 2025-04-04   JPST  0.152138
11 2025-04-04   PULS  0.151988
12 2025-04-04   SPTS  0.139178
13 2025-04-04   VGSH  0.116990
14 2025-04-04    BSV  0.088944
15 2025-04-04    BLV  0.039118
16 2025-04-04    TLH  0.023623
17 2025-04-04   SPTL  0.022180
18 2025-04-03    COR  0.141221
19 2025-04-03    RSG  0.130864
20 2025-04-0