In [3]:
import pandas as pd
import glob
import os

def load_csv_files_from_folder(folder_path):

    csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
    loaded_dataframes = []

    if not csv_files:
        # Minimal report: only if no files are found at all
        print(f"No CSV files found in '{folder_path}'.")
        return loaded_dataframes

    for file_path in csv_files:
        try:
            df = pd.read_csv(file_path)
            loaded_dataframes.append(df)
        except Exception:
            # Minimal error handling: simply skip the file and do not report details
            pass # No action/report as requested ("just do only that")

    return loaded_dataframes

# --- Execution ---
yfinance_data_folder = '../data/yfinance_data/' 

# Load all CSVs into a list of DataFrames
all_yfinance_dfs = load_csv_files_from_folder(yfinance_data_folder)


print(f"Successfully loaded {len(all_yfinance_dfs)} DataFrames.")
if all_yfinance_dfs:
    print("\nHead of the first loaded DataFrame:")
    print(all_yfinance_dfs[0].head())

Successfully loaded 7 DataFrames.

Head of the first loaded DataFrame:
         Date      Open      High       Low     Close  Adj Close      Volume  \
0  1986-03-13  0.088542  0.101563  0.088542  0.097222   0.059946  1031788800   
1  1986-03-14  0.097222  0.102431  0.097222  0.100694   0.062087   308160000   
2  1986-03-17  0.100694  0.103299  0.100694  0.102431   0.063158   133171200   
3  1986-03-18  0.102431  0.103299  0.098958  0.099826   0.061552    67766400   
4  1986-03-19  0.099826  0.100694  0.097222  0.098090   0.060482    47894400   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


In [None]:
import pandas as pd

# Define ALL the required columns for each DataFrame
REQUIRED_COLUMNS = [
    'Date', 'Open', 'High', 'Low', 'Close', 'Adj Close',
    'Volume', 'Dividends', 'Stock Splits'
]

# Define a mapping for common alternative column names to standard names
COLUMN_NAME_MAPPING = {
    'date': 'Date',
    'open': 'Open',
    'high': 'High',
    'low': 'Low',
    'close': 'Close',
    'adj close': 'Adj Close',
    'adj_close': 'Adj Close',
    'volume': 'Volume',
    'vol': 'Volume',
    'dividends': 'Dividends',
    'dividend': 'Dividends',
    'stock splits': 'Stock Splits',
    'stock_splits': 'Stock Splits'
}

# --- Function to process each DataFrame ---
def standardize_df_columns(df, required_cols, col_mapping):

    # 1. Standardize column names (lowercase original names first for mapping)
    df.columns = df.columns.str.lower()
    df.rename(columns=col_mapping, inplace=True)

    # 2. Add missing required columns with pd.NA
    for col in required_cols:
        if col not in df.columns:
            df[col] = pd.NA

    # 3. Select ONLY the required columns and reorder them
    # This automatically drops any columns not in `required_cols`
    df = df[required_cols]

    return df

# --- Process all loaded DataFrames ---
# This list will store your DataFrames after column standardization
processed_yfinance_dfs = []

# Loop through each DataFrame loaded in the previous step
for df in all_yfinance_dfs:
    try:
        # Process the DataFrame
        processed_df = standardize_df_columns(df.copy(), REQUIRED_COLUMNS, COLUMN_NAME_MAPPING)
        processed_yfinance_dfs.append(processed_df)
    except Exception:
        pass 

print(f"Finished processing {len(processed_yfinance_dfs)} DataFrames.")
if processed_yfinance_dfs:
    print("\nExample: Head of the first processed DataFrame:")
    print(processed_yfinance_dfs[0].head())
    print("\nExample: Columns of the first processed DataFrame:")
    print(processed_yfinance_dfs[0].columns.tolist())
    print("\nExample: Check shape and columns of all processed DFs:")
    for i, df in enumerate(processed_yfinance_dfs):
        print(f"DF {i}: Shape {df.shape}, Columns {df.columns.tolist()}")

Finished processing 7 DataFrames.

Example: Head of the first processed DataFrame:
         Date      Open      High       Low     Close  Adj Close      Volume  \
0  1986-03-13  0.088542  0.101563  0.088542  0.097222   0.059946  1031788800   
1  1986-03-14  0.097222  0.102431  0.097222  0.100694   0.062087   308160000   
2  1986-03-17  0.100694  0.103299  0.100694  0.102431   0.063158   133171200   
3  1986-03-18  0.102431  0.103299  0.098958  0.099826   0.061552    67766400   
4  1986-03-19  0.099826  0.100694  0.097222  0.098090   0.060482    47894400   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  

Example: Columns of the first processed DataFrame:
['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']

Example: Check shape and columns of all processed DFs:
DF 0: Shape (9672, 9), Columns ['Date', 'Open', 'High', 'Low'