In [1]:
import sys
from pathlib import Path
import pandas as pd
import os
from IPython.display import display, Markdown  # Assuming you use these for display


# Set pandas display options to show more columns and rows
pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.max_rows', 10)       # Limit to 10 rows for readability
pd.set_option('display.width', None)        # Let the display adjust to the window

# 2. Set the display width (optional but 
# often helpful)
#    'None' tries to detect terminal width. 
#    A large number (e.g., 1000) ensures no wrapping unless absolutely necessary.
pd.set_option('display.width', 1000) 


# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils

print(f'\n ===== Pick a file to extract the date to run run_sequence.py =====')

SOURCE_PATH, _, _ = utils.main_processor(
    data_dir='..\_',  # search project ..\data
    # data_dir='..\data',  # search project ..\data        
    downloads_dir=None,  # None searchs Downloads dir, '' omits search1
    downloads_limit=60,  # search the first 10 files
    clean_name_override=None,  # override filename
    start_file_pattern='df_OHLCV_', # search for files starting with 'df_'
    contains_pattern='.parquet',  # search for files containing 'df_'
)


Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\python311.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\Lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py311\\stocks\\src']

 ===== Pick a file to extract the date to run run_sequence.py =====


<span style='color:#00ffff;font-weight:500'>[Downloads] Scanned latest 60 files • Found 9 'df_OHLCV_' matches</span>

**Available 'starting with 'df_OHLCV_' and containing '.parquet'' files:**

- (1) `[DOWNLOADS]` `df_OHLCV_2025-05-05_stocks_etfs.parquet` <span style='color:#00ffff'>(13.03 MB, 2025-05-05 23:55)</span>

- (2) `[DOWNLOADS]` `df_OHLCV_2025-05-02_stocks_etfs.parquet` <span style='color:#00ffff'>(13.03 MB, 2025-05-03 04:20)</span>

- (3) `[DOWNLOADS]` `df_OHLCV_2025-05-01_stocks_etfs.parquet` <span style='color:#00ffff'>(13.00 MB, 2025-05-01 22:28)</span>

- (4) `[DOWNLOADS]` `df_OHLCV_2025-04-30_stocks_etfs.parquet` <span style='color:#00ffff'>(12.99 MB, 2025-04-30 22:30)</span>

- (5) `[DOWNLOADS]` `df_OHLCV_2025-04-29_stocks_etfs.parquet` <span style='color:#00ffff'>(12.89 MB, 2025-04-29 22:10)</span>

- (6) `[DOWNLOADS]` `df_OHLCV_2025-04-28_stocks_etfs.parquet` <span style='color:#00ffff'>(13.00 MB, 2025-04-28 22:38)</span>

- (7) `[DOWNLOADS]` `df_OHLCV_2025-04-25_stocks_etfs.parquet` <span style='color:#00ffff'>(13.04 MB, 2025-04-27 21:01)</span>

- (8) `[DOWNLOADS]` `df_OHLCV_2025-04-25_stocks.parquet` <span style='color:#00ffff'>(9.90 MB, 2025-04-25 21:34)</span>

- (9) `[DOWNLOADS]` `df_OHLCV_2025-04-24_stocks.parquet` <span style='color:#00ffff'>(9.83 MB, 2025-04-25 01:34)</span>


Input a number to select file (1-9)



    **Selected paths:**
    - Source: `C:\Users\ping\Downloads\df_OHLCV_2025-04-25_stocks.parquet`
    - Destination: `c:\Users\ping\Files_win10\python\py311\stocks\_\df_OHLCV_2025-04-25_stocks_clean.parquet`
    

In [2]:
import re

# Extract date using regex pattern
date_pattern = r'(\d{4}-\d{2}-\d{2})'
match = re.search(date_pattern, SOURCE_PATH)
if match:
  date_str = match.group(1)
  print(f"Extracted date: {date_str}")
else:
  print("No date found in the path")

Extracted date: 2025-04-25


In [3]:
# create_config.py
def create_config_file(date_str):
    """Create config.py with date configuration"""
    config_content = f"""# config.py
# File path configuration
date_str = '{date_str}'  # Date in YYYY-MM-DD format
DOWNLOAD_DIR = r'C:\\Users\\ping\\Downloads'  # Raw string for Windows paths
DEST_DIR = r'..\\data'
"""
    
    with open('config.py', 'w') as f:
        f.write(config_content)
    
    print(f"config.py created with date: {date_str}")


In [4]:
create_config_file(date_str)

config.py created with date: 2025-04-25


In [5]:
# script_using_config.py
from config import date_str, DOWNLOAD_DIR, DEST_DIR

print(f"Date string from config: {date_str}")  # Direct string access
print(f"Source directory from config: {DOWNLOAD_DIR}")
print(f"Destination directory from config: {DEST_DIR}")

Date string from config: 2025-04-25
Source directory from config: C:\Users\ping\Downloads
Destination directory from config: ..\data


In [None]:
print(f'Running sequence for {date_str}')
%run run_sequence.py