In [2]:
pip install pyyaml


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip




In [4]:
import os
import pandas as pd
import yaml

def yaml_to_dataframe(file_path):
    """Reads a YAML file and converts it to a pandas DataFrame."""
    with open(file_path, 'r') as file:
        data = yaml.safe_load(file)
    return pd.DataFrame(data)

def process_yaml_files(data_folder):
    """Processes all YAML files in subdirectories and compiles them into a single DataFrame."""
    all_data = []
    
    for subdir in os.listdir(data_folder):
        subdir_path = os.path.join(data_folder, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                if file.endswith(".yaml") or file.endswith(".yml"):
                    file_path = os.path.join(subdir_path, file)
                    df = yaml_to_dataframe(file_path)
                    all_data.append(df)
    
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

def save_stockwise_csv(df, output_folder):
    """Saves the data into separate CSV files based on stock tickers."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for ticker, stock_df in df.groupby('Ticker'):
        year = stock_df['date'].iloc[0][:4]  # Extract year from date
        file_name = f"{ticker}.csv"
        stock_df.to_csv(os.path.join(output_folder, file_name), index=False)

def main():
    data_folder = r"C:\Users\velut\Downloads\data (1)"
    output_folder = r"C:\Users\velut\OneDrive\Documents\New folder\output"

    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    combined_df = process_yaml_files(data_folder)

    if not combined_df.empty:
        combined_df.to_csv(os.path.join(output_folder, "all_stocks.csv"), index=False)
        save_stockwise_csv(combined_df, output_folder)
    else:
        print("No data found in YAML files.")

if __name__ == "__main__":
    main()