In [1]:
import pandas as pd

# --- Assumptions (replace with your actual data) ---
# Assume sorted_selected_stocks_df looks something like this (already indexed by Ticker):
#         Cluster_ID  Raw_Score  Risk_Adj_Score  Weight  Volatility
# Ticker
# AAPL             1       0.85            0.75    0.15        0.25
# MSFT             1       0.80            0.70    0.12        0.22
# GOOG             0       0.90            0.80    0.10        0.30
# AMZN             0       0.75            0.65    0.08        0.35
# ...

# Assume df_data exists and might have different columns and potentially a different index/order
# Example:
data_for_df = {
    'Price': [155, 290, 2800, 3400, 500],
    'Volume': [1e6, 5e5, 2e5, 3e5, 1e5]
}
index_for_df = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'META'] # Example index
df_data = pd.DataFrame(data_for_df, index=index_for_df)
# df_data might look like:
#       Price  Volume
# AAPL    155  1000000.0
# MSFT    290   500000.0
# GOOG   2800   200000.0
# AMZN   3400   300000.0
# META    500   100000.0


# Assume ticker_order is defined (the desired final order)
# Example: must contain tickers present in BOTH dataframes you want to combine
ticker_order = ['MSFT', 'AAPL', 'GOOG', 'AMZN'] # Example order

# --- Solution ---

# 1. Select the desired columns from sorted_selected_stocks_df
new_cols_df = sorted_selected_stocks_df[['Weight', 'Cluster_ID']]

# 2. Reindex both the new columns and the existing df_data to the desired ticker_order.
#    This ensures rows align correctly before concatenation.
#    Use errors='ignore' in drop just in case Weight/Cluster_ID weren't in df_data.
new_cols_ordered = new_cols_df.reindex(ticker_order)
other_cols_ordered = df_data.drop(columns=['Weight', 'Cluster_ID'], errors='ignore').reindex(ticker_order)

# 3. Concatenate along columns (axis=1), putting the new columns first.
#    The index will be ticker_order because both DataFrames were reindexed.
final_df = pd.concat([new_cols_ordered, other_cols_ordered], axis=1)

# --- Verification ---
print("Original df_data (example):")
print(df_data)
print("\nSelected columns from sorted_selected_stocks_df (example):")
# Example data for sorted_selected_stocks_df if needed for running the code
example_sorted_data = {
    'Cluster_ID': [1, 1, 0, 0],
    'Raw_Score': [0.85, 0.80, 0.90, 0.75],
    'Risk_Adj_Score': [0.75, 0.70, 0.80, 0.65],
    'Weight': [0.15, 0.12, 0.10, 0.08],
    'Volatility': [0.25, 0.22, 0.30, 0.35]
}
sorted_selected_stocks_df = pd.DataFrame(example_sorted_data, index=['AAPL', 'MSFT', 'GOOG', 'AMZN'])
sorted_selected_stocks_df.index.name = 'Ticker'
print(sorted_selected_stocks_df[['Weight', 'Cluster_ID']])

print("\nFinal DataFrame:")
print(final_df)
print("\nFinal DataFrame Index:")
print(final_df.index)

NameError: name 'sorted_selected_stocks_df' is not defined

In [14]:
import pandas as pd
import numpy as np

# --- Modified Function Definition ---
def get_final_ewm_cov_corr_matrices(df, span=21, return_corr=True, return_cov=True):
    """
    Calculates the *final* EWM covariance and/or correlation matrix
    using the standard pandas df.ewm(adjust=True, ignore_na=False).cov() method.

    Args:
        df (pd.DataFrame): Input DataFrame with time series data (e.g., returns).
                           Index should be time-ordered.
        span (int): The span for the EWM calculation.
        return_corr (bool): Whether to return the correlation matrix.
        return_cov (bool): Whether to return the covariance matrix.

    Returns:
        pd.DataFrame or tuple(pd.DataFrame, pd.DataFrame):
            - If both return_cov and return_corr are True, returns (covariance, correlation).
            - If only one is True, returns that specific matrix.
            - Returns NaN-filled DataFrame(s) if calculation is not possible (e.g., empty df).
    """
    if df.empty or len(df) < 2: # Need at least 2 points for covariance
        # Handle empty or too short DataFrame input
        nan_res = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)
        results = []
        if return_cov: results.append(nan_res)
        if return_corr: results.append(nan_res)
        # Ensure returning tuple if both requested, even if NaN
        if len(results) > 1:
             return tuple(results)
        elif len(results) == 1:
             return results[0]
        else:
             return None # Or raise error if neither requested?

    # --- Core Calculation using Pandas EWM ---
    try:
        # Calculate the full EWM covariance time series
        # adjust=True: Standard bias correction for EWM
        # ignore_na=False: Propagate NaNs if insufficient data at a point in time
        ewm_cov_ts = df.ewm(span=span, adjust=True, ignore_na=False).cov()

        # Check if the result is empty or all NaN (e.g., df shorter than min_periods derived from span)
        if ewm_cov_ts.empty or ewm_cov_ts.isnull().all().all():
             raise ValueError("Pandas EWM calculation resulted in all NaNs.")

        # Extract the *final* covariance matrix (corresponding to the last index)
        last_index = df.index[-1]
        # Use .iloc[-n_assets*n_assets:] as a robust way to get the last block
        # in case of duplicate indices or multi-index issues after EWM.
        # However, direct loc is usually cleaner if index is unique and standard.
        try:
            cov_matrix_df = ewm_cov_ts.loc[last_index]
        except KeyError:
            # Fallback if last_index isn't found directly (e.g., dropped due to NaNs earlier)
            # Find the last available index in the EWM result
            last_ewm_index = ewm_cov_ts.index.get_level_values(0)[-1]
            cov_matrix_df = ewm_cov_ts.loc[last_ewm_index]


    except Exception as e:
         # Handle potential errors during EWM calculation or extraction
         print(f"Error during pandas EWM calculation or final matrix extraction: {e}")
         # Return NaN matrices in case of error
         nan_res = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)
         results = []
         if return_cov: results.append(nan_res)
         if return_corr: results.append(nan_res)
         if len(results) > 1:
             return tuple(results)
         elif len(results) == 1:
             return results[0]
         else:
             return None
    # --- End of Core Calculation ---


    # Prepare results
    results = []
    if return_cov:
        results.append(cov_matrix_df)

    if return_corr:
        # Calculate correlation from the final covariance matrix
        cov_matrix = cov_matrix_df.values # Use numpy array for calculation
        variances = np.diag(cov_matrix).copy()

        # Handle non-positive variances robustly
        variances[variances <= 1e-14] = 1e-14 # Use a small positive floor
        std_devs = np.sqrt(variances)

        # Calculate correlation matrix
        inv_std_devs = 1.0 / std_devs
        corr_matrix = cov_matrix * np.outer(inv_std_devs, inv_std_devs)

        # Clip diagonal to exactly 1.0 and off-diagonal to [-1.0, 1.0]
        # Handles potential floating point inaccuracies
        np.fill_diagonal(corr_matrix, 1.0)
        corr_matrix = np.clip(corr_matrix, -1.0, 1.0)

        corr_matrix_df = pd.DataFrame(corr_matrix,
                                      index=df.columns,
                                      columns=df.columns)
        results.append(corr_matrix_df)

    # Return tuple if both requested, otherwise the single DataFrame
    return tuple(results) if len(results) > 1 else results[0]

# --- Example Usage ---
np.random.seed(42) # for reproducibility
n_obs = 100
n_assets = 5
data = np.random.randn(n_obs, n_assets) * 0.01 + 0.0001 # Simulate returns
dates = pd.date_range(start='2023-01-01', periods=n_obs, freq='B')
df_test = pd.DataFrame(data, index=dates, columns=[f'Asset_{chr(65+i)}' for i in range(n_assets)])

# Get both final covariance and correlation
cov_final, corr_final = get_final_ewm_cov_corr_matrices(df_test, span=21, return_corr=True, return_cov=True)

print("--- Final EWM Covariance (adjust=True) ---")
print(cov_final)
print("\n--- Final EWM Correlation (adjust=True) ---")
print(corr_final)

# Get only final covariance
cov_only = get_final_ewm_cov_corr_matrices(df_test, span=21, return_corr=False, return_cov=True)
print("\n--- Final EWM Covariance Only ---")
print(cov_only)

--- Final EWM Covariance (adjust=True) ---
          Asset_A   Asset_B   Asset_C   Asset_D   Asset_E
Asset_A  0.000076  0.000014  0.000006 -0.000025 -0.000027
Asset_B  0.000014  0.000131  0.000031 -0.000020  0.000008
Asset_C  0.000006  0.000031  0.000082  0.000003  0.000012
Asset_D -0.000025 -0.000020  0.000003  0.000139  0.000054
Asset_E -0.000027  0.000008  0.000012  0.000054  0.000123

--- Final EWM Correlation (adjust=True) ---
          Asset_A   Asset_B   Asset_C   Asset_D   Asset_E
Asset_A  1.000000  0.137990  0.075930 -0.243463 -0.282015
Asset_B  0.137990  1.000000  0.298636 -0.147612  0.064539
Asset_C  0.075930  0.298636  1.000000  0.024381  0.116358
Asset_D -0.243463 -0.147612  0.024381  1.000000  0.409850
Asset_E -0.282015  0.064539  0.116358  0.409850  1.000000

--- Final EWM Covariance Only ---
          Asset_A   Asset_B   Asset_C   Asset_D   Asset_E
Asset_A  0.000076  0.000014  0.000006 -0.000025 -0.000027
Asset_B  0.000014  0.000131  0.000031 -0.000020  0.000008
Asset_C

In [13]:
import pandas as pd
import numpy as np

# --- Your Function Definition ---
def get_cov_corr_ewm_matrices_chunked(df, span=21, return_corr=True, return_cov=True, chunk_size=100):
    """
    Robust chunked calculation of EWM covariance and correlation matrices.
    Handles edge cases and ensures proper broadcasting.
    (NOTE: Calculates a specific weighted average, not standard recursive EWM Cov)
    """
    alpha = 2 / (span + 1)

    # Clean data - remove inf and drop rows with any NaN
    # IMPORTANT: This dropna() can differ significantly from pandas EWM NaN handling
    clean_df = df.replace([np.inf, -np.inf], np.nan).dropna()
    if clean_df.empty:
         # Handle empty DataFrame after cleaning
        empty_res = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)
        results = []
        if return_cov: results.append(empty_res)
        if return_corr: results.append(empty_res)
        return tuple(results) if len(results) > 1 else results[0]

    n_assets = len(clean_df.columns)
    n_obs = len(clean_df)

    # Calculate EWM mean (point-in-time, adjust=False) and demean
    ewm_mean = clean_df.ewm(alpha=alpha, adjust=False).mean()
    demeaned = clean_df - ewm_mean

    # Compute weights as a column vector (for the entire history)
    weights = (1 - alpha) ** np.arange(n_obs - 1, -1, -1) # Corrected range N-1 down to 0
    # Denominator for normalization (adjust=True like EWM variance)
    # Note: Standard EWM variance/covariance adjust=True uses a different recursive normalization
    # This is one interpretation of a full-history adjusted weight sum
    # It ensures weights roughly sum to 1 for large N, similar to adjust=True spirit
    norm_factor = np.sum(weights) # Simple sum normalization
    weights /= norm_factor
    weights = weights.reshape(-1, 1)  # Shape (n_obs, 1)

    # Initialize covariance matrix
    cov_matrix = np.zeros((n_assets, n_assets))

    # Process in chunks
    demeaned_vals = demeaned.values # Use numpy array for faster access
    for i in range(0, n_assets, chunk_size):
        i_end = min(i + chunk_size, n_assets)
        chunk_i = demeaned_vals[:, i:i_end]  # Shape (n_obs, chunk_size_i)

        # Apply weights to chunk_i (broadcasting works automatically)
        weighted_chunk_i = chunk_i * weights  # Shape (n_obs, chunk_size_i)

        for j in range(i, n_assets, chunk_size):  # Start from i for upper triangle
            j_end = min(j + chunk_size, n_assets)
            chunk_j = demeaned_vals[:, j:j_end]  # Shape (n_obs, chunk_size_j)

            # Calculate weighted products for this chunk pair
            # weighted_chunk_i.T @ chunk_j performs Sum [ w_t * demeaned_i_t * demeaned_j_t ]
            cov_chunk = np.dot(weighted_chunk_i.T, chunk_j) # Shape (chunk_size_i, chunk_size_j)

            # Fill the covariance matrix
            cov_matrix[i:i_end, j:j_end] = cov_chunk

            # Fill symmetric part if not on diagonal
            if i != j:
                cov_matrix[j:j_end, i:i_end] = cov_chunk.T

    # Prepare results
    results = []
    cov_matrix_df = pd.DataFrame(cov_matrix,
                               index=clean_df.columns,
                               columns=clean_df.columns)
    if return_cov:
        results.append(cov_matrix_df)

    if return_corr:
        # Handle zero variances
        variances = np.diag(cov_matrix).copy()
        # Use a larger epsilon for numerical stability if variances are calculated near zero
        variances[variances <= 1e-12] = 1e-12
        std_devs = np.sqrt(variances)

        # Ensure diagonal is exactly 1 and handle potential division by near-zero std devs
        inv_std_devs = 1.0 / std_devs
        corr_matrix = cov_matrix * np.outer(inv_std_devs, inv_std_devs)
        np.fill_diagonal(corr_matrix, 1.0) # Clip diagonal for perfect 1s
        corr_matrix = np.clip(corr_matrix, -1.0, 1.0) # Clip off-diagonal due to potential fp errors

        corr_matrix_df = pd.DataFrame(corr_matrix,
                                    index=clean_df.columns,
                                    columns=clean_df.columns)
        results.append(corr_matrix_df)

    return tuple(results) if len(results) > 1 else results[0]
# --- End of Your Function ---


# --- Comparison Setup ---
# 1. Generate Sample Data
np.random.seed(42) # for reproducibility
n_obs = 100
n_assets = 5
data = np.random.randn(n_obs, n_assets) * 0.01 + 0.0001 # Simulate returns
dates = pd.date_range(start='2023-01-01', periods=n_obs, freq='B')
df = pd.DataFrame(data, index=dates, columns=[f'Asset_{chr(65+i)}' for i in range(n_assets)])
# Introduce a NaN to see handling (your code drops row, pandas ignores pair/propagates)
# df.iloc[10, 1] = np.nan

# 2. Define Parameters
span = 21
chunk_size = 2 # Small chunk size for testing chunking logic

# --- Run Calculations ---

# 3. Your Code's Calculation
print("--- Running Your Custom Function ---")
# Request only covariance
cov_custom = get_cov_corr_ewm_matrices_chunked(
    df.copy(), # Pass copy as your function modifies internally (dropna)
    span=span,
    return_corr=False,
    return_cov=True,
    chunk_size=chunk_size
)
print("\nCovariance Matrix from Your Function:")
print(cov_custom)

# 4. Standard Pandas EWM Calculation (adjust=True)
print("\n--- Running Standard Pandas EWM (adjust=True) ---")
# Calculate the full EWM covariance time series
# Note: Pandas default for ignore_na changed over versions. Explicitly set.
# ignore_na=False propagates NaNs. ignore_na=True tries to compute using available pairs.
ewm_cov_std_all = df.ewm(span=span, adjust=True, ignore_na=False).cov()

# Extract the *final* covariance matrix (corresponding to the last date)
final_date = df.index[-1]
cov_std_final = ewm_cov_std_all.loc[final_date]

print("\nFinal Covariance Matrix from Pandas EWM (adjust=True):")
print(cov_std_final)

# --- Compare Results ---
print("\n--- Comparison ---")

# Check if the DataFrames are numerically close
# Note: We expect them to be DIFFERENT due to calculation method differences
are_close = np.allclose(cov_custom.values, cov_std_final.values, atol=1e-8) # Use tolerance for float comparison
print(f"\nAre the matrices numerically close? {are_close}")

# Show the difference
difference = cov_custom - cov_std_final
print("\nDifference Matrix (Your Function - Pandas EWM):")
print(difference)

print("\nExplanation:")
print("The matrices are expected to be DIFFERENT.")
print("Key reasons for differences include:")
print("1. Calculation Method: Your code computes a single, full-history weighted average of cross-products of *point-in-time* demeaned values. Standard EWM is recursive.")
print("2. Demeaning: Your code uses point-in-time `adjust=False` EWM mean for demeaning. Standard `adjust=True` EWM Covariance effectively uses bias-corrected means in its recursive update, and the final 'adjust=True' value reflects a different normalization/weighting.")
# print("3. NaN Handling: Your code uses dropna() (removes entire row), while pandas EWM has `ignore_na` options (if NaNs were present).") # Uncomment if you add NaNs
print("4. Weight Normalization: The exact normalization factor applied in your direct sum differs from the implicit normalization within the standard EWM recursion, especially the 'adjust=True' variant.")

--- Running Your Custom Function ---

Covariance Matrix from Your Function:
          Asset_A   Asset_B   Asset_C   Asset_D   Asset_E
Asset_A  0.000066  0.000012  0.000005 -0.000022 -0.000024
Asset_B  0.000012  0.000114  0.000027 -0.000017  0.000007
Asset_C  0.000005  0.000027  0.000071  0.000002  0.000010
Asset_D -0.000022 -0.000017  0.000002  0.000120  0.000046
Asset_E -0.000024  0.000007  0.000010  0.000046  0.000107

--- Running Standard Pandas EWM (adjust=True) ---

Final Covariance Matrix from Pandas EWM (adjust=True):
          Asset_A   Asset_B   Asset_C   Asset_D   Asset_E
Asset_A  0.000076  0.000014  0.000006 -0.000025 -0.000027
Asset_B  0.000014  0.000131  0.000031 -0.000020  0.000008
Asset_C  0.000006  0.000031  0.000082  0.000003  0.000012
Asset_D -0.000025 -0.000020  0.000003  0.000139  0.000054
Asset_E -0.000027  0.000008  0.000012  0.000054  0.000123

--- Comparison ---

Are the matrices numerically close? False

Difference Matrix (Your Function - Pandas EWM):
         

In [10]:
import sys
from pathlib import Path
import pandas as pd

# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils

# SOURCE_PATH_OHLCV = '..\data\df_OHLCV_2025-03-07_clean.pkl'
# SOURCE_PATH_STOCK = '..\data\df_finviz_stocks_n_ratios.pkl'
# SOURCE_PATH_ETF = '..\data\df_finviz_etfs_n_ratios.pkl'

SOURCE_PATH, DEST_PATH = utils.main_processor(
    data_dir='..\data',  # search project ..\data
    downloads_dir=None,  # None searchs Downloads dir, '' omits search1
    downloads_limit=60,  # search the first 10 files
    clean_name_override='df_finviz.pkl',  # override filename
    start_file_pattern='df_finviz_2025', # search for files starting with 'df_'
    # start_file_pattern='df_OHLCV_2025', # search for files starting with 'df_'    
)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\python311.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\Lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py311\\stocks\\src', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\setuptools\\_vendor', 'c:\\Users\\ping\\Files_win10\\python\\py311\\stocks\\src', 'c:\\Users\\ping\\Files_w

<span style='color:#00ffff;font-weight:500'>[Downloads] Scanned latest 60 files • Found 14 'df_finviz_2025' matches</span>

**Available 'df_finviz_2025' files:**

- (1) `[DOWNLOADS]` `df_finviz_2025-04-01.parquet` <span style='color:#00ffff'>(6.88 MB, 2025-04-01 16:37)</span>

- (2) `[DOWNLOADS]` `df_finviz_2025-03-31.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-31 23:53)</span>

- (3) `[DOWNLOADS]` `df_finviz_2025-03-28.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-28 17:41)</span>

- (4) `[DOWNLOADS]` `df_finviz_2025-03-27.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-27 15:09)</span>

- (5) `[DOWNLOADS]` `df_finviz_2025-03-26.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-26 15:12)</span>

- (6) `[DOWNLOADS]` `df_finviz_2025-03-25.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-25 15:23)</span>

- (7) `[DOWNLOADS]` `df_finviz_2025-03-24 (1).pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-24 13:20)</span>

- (8) `[DOWNLOADS]` `df_finviz_2025-03-24.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-24 13:12)</span>

- (9) `[DOWNLOADS]` `df_finviz_2025-03-21.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-21 13:09)</span>

- (10) `[DOWNLOADS]` `df_finviz_2025-03-20.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-20 13:18)</span>

- (11) `[DOWNLOADS]` `df_finviz_2025-03-19.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-19 13:14)</span>

- (12) `[DOWNLOADS]` `df_finviz_2025-03-18.pkl` <span style='color:#00ffff'>(0.51 MB, 2025-03-18 13:38)</span>

- (13) `[DOWNLOADS]` `df_finviz_2025-03-17.pkl` <span style='color:#00ffff'>(0.46 MB, 2025-03-17 13:06)</span>

- (14) `[DOWNLOADS]` `df_finviz_2025-03-14.pkl` <span style='color:#00ffff'>(0.46 MB, 2025-03-14 15:30)</span>


Input a number to select file (1-14)



    **Selected paths:**
    - Source: `C:\Users\ping\Downloads\df_finviz_2025-04-01.parquet`  
    - Destination: `..\data\df_finviz.pkl`
    

In [11]:
import pandas as pd

# Read the Parquet file
df = pd.read_parquet(SOURCE_PATH, engine='pyarrow')

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 394579 entries, ('TBIL', 'Apr 1, 2025') to ('IEI', 'Apr 2, 2024')
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   Open       394579 non-null  object
 1   High       389418 non-null  object
 2   Low        389418 non-null  object
 3   Close      389418 non-null  object
 4   Adj Close  389418 non-null  object
 5   Volume     389418 non-null  object
dtypes: object(6)
memory usage: 19.6+ MB


In [27]:
import re

# Extract date using regex pattern
date_pattern = r'(\d{4}-\d{2}-\d{2})'
match = re.search(date_pattern, SOURCE_PATH)
if match:
  date_str = match.group(1)
  print(f"Extracted date: {date_str}")
else:
  print("No date found in the path")

Extracted date: 2025-03-14


In [28]:
# create_config.py
def create_config_file(date_str):
    """Create config.py with date_str as a string variable"""
    config_content = f"""# config.py
# Automatically generated date configuration
date_str = '{date_str}'  # Date in YYYY-MM-DD format
"""
    
    with open('config.py', 'w') as f:
        f.write(config_content)
    
    print(f"config.py created successfully with date: {date_str}")


In [29]:
create_config_file(date_str)

config.py created successfully with date: 2025-03-14


In [30]:
# script_using_config.py
from config import date_str

print(f"Date string from config: {date_str}")  # Direct string access

Date string from config: 2025-03-14
