In [1]:
import sys
from pathlib import Path
import pandas as pd
import os
from IPython.display import display, Markdown  # Assuming you use these for display


# Set pandas display options to show more columns and rows
pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.max_rows', 10)       # Limit to 10 rows for readability
pd.set_option('display.width', None)        # Let the display adjust to the window

# 2. Set the display width (optional but 
# often helpful)
#    'None' tries to detect terminal width. 
#    A large number (e.g., 1000) ensures no wrapping unless absolutely necessary.
pd.set_option('display.width', 1000) 


# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils



Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\python311.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\Lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py311\\stocks\\src']


In [2]:
files_list = utils.get_recent_files_in_directory(
    prefix='df_finviz_2025',
    extension='parquet',
    count=60,
    directory_name='Downloads'

 )

if files_list:
    print(f"\nMost recent files found in targetdirectory:")
    for i, filename in enumerate(files_list):
        print(f"{i+1}. {filename}")
else:
    print("No 'ticker' CSV files found in your Downloads directory, or an error occurred.")

target_dir: C:\Users\ping\Downloads

Most recent files found in targetdirectory:
1. df_finviz_2025-05-14_stocks_etfs.parquet
2. df_finviz_2025-05-13_stocks_etfs.parquet
3. df_finviz_2025-05-12_stocks_etfs.parquet
4. df_finviz_2025-05-09_stocks_etfs.parquet
5. df_finviz_2025-05-08_stocks_etfs.parquet
6. df_finviz_2025-05-07_stocks_etfs.parquet
7. df_finviz_2025-05-06_stocks_etfs.parquet
8. df_finviz_2025-05-05_stocks_etfs.parquet
9. df_finviz_2025-05-02_stocks_etfs.parquet
10. df_finviz_2025-05-01_stocks_etfs.parquet
11. df_finviz_2025-04-30_stocks_etfs.parquet
12. df_finviz_2025-04-29_stocks_etfs.parquet
13. df_finviz_2025-04-28_stocks_etfs.parquet
14. df_finviz_2025-04-25_stocks_etfs.parquet


In [3]:
import os

def extract_and_sort_dates(files):
    """
    Extracts date strings from a list of filenames, sorts them, and returns the sorted list.

    Args:
        files: A list of filenames with the format 'prefix_YYYY-MM-DD_suffix.extension'.

    Returns:
        A list of date strings in 'YYYY-MM-DD' format, sorted from oldest to newest.
    """
    date_list = []
    for file in files:
        parts = file.split('_')
        if len(parts) > 2:
            # Assuming the date is always the third part (index 2) after splitting by '_'
            date_str = parts[2]
            date_list.append(date_str)

    # Sorting the date strings directly works because the format YYYY-MM-DD is sortable alphabetically
    date_list.sort()
    return date_list



In [9]:
sorted_dates = extract_and_sort_dates(files_list)

for idx, _date in enumerate(sorted_dates):
  print(f"{idx:<3}  {_date}")

0    2025-04-25
1    2025-04-28
2    2025-04-29
3    2025-04-30
4    2025-05-01
5    2025-05-02
6    2025-05-05
7    2025-05-06
8    2025-05-07
9    2025-05-08
10   2025-05-09
11   2025-05-12
12   2025-05-13
13   2025-05-14


In [5]:
# create_config.py
def create_config_file(date_str):
    """Create config.py with date configuration"""
    config_content = f"""# config.py
# File path configuration
date_str = '{date_str}'  # Date in YYYY-MM-DD format
DOWNLOAD_DIR = r'C:\\Users\\ping\\Downloads'  # Raw string for Windows paths
DEST_DIR = r'..\\data'
"""
    
    with open('config.py', 'w') as f:
        f.write(config_content)
    
    print(f"config.py created with date: {date_str}")


In [10]:
sorted_dates[-1::]

['2025-05-14']

#### ==== BEFORE RUNNING SEQUENCE ====
#### Run colab finviz, Yloader
#### Select sorted_dates slice


In [None]:
# for date_str in sorted_dates:
for date_str in sorted_dates[-1::]:  # Run the sequence for the most recent date
# for date_str in sorted_dates[2::]:  # Run the sequence for the 3rd to the most recent date
  create_config_file(date_str)  # Create config.py with the date_str
  print(f'Updated config.py with date: {date_str}')
  print(f'Running sequence for {date_str}')
  %run run_sequence.py


config.py created with date: 2025-05-14
Updated config.py with date: 2025-05-14
Running sequence for 2025-05-14
Starting notebook execution sequence...

--- Running py1_clean_df_finviz_v13.ipynb ---

Running command: c:\Users\ping\Files_win10\python\py311\.venv\Scripts\jupyter nbconvert --to notebook --execute --output executed\executed_py1_clean_df_finviz_v13.ipynb py1_clean_df_finviz_v13.ipynb
Successfully executed py1_clean_df_finviz_v13.ipynb
Output saved to: executed\executed_py1_clean_df_finviz_v13.ipynb

--- Running py2_clean_df_OHLCV_v9.ipynb ---

Running command: c:\Users\ping\Files_win10\python\py311\.venv\Scripts\jupyter nbconvert --to notebook --execute --output executed\executed_py2_clean_df_OHLCV_v9.ipynb py2_clean_df_OHLCV_v9.ipynb
Successfully executed py2_clean_df_OHLCV_v9.ipynb
Output saved to: executed\executed_py2_clean_df_OHLCV_v9.ipynb

--- Running py2_save_df_adj_close_v0.ipynb ---

Running command: c:\Users\ping\Files_win10\python\py311\.venv\Scripts\jupyter nbc