In [1]:
import os
import shutil
import re
current_directory = os.getcwd()
market_gen_path = os.path.dirname(current_directory)

In [2]:
def sort_files(source_folder):
    # Map option maturity to corresponding folder name (e.g., 5D, 10D, etc.)
    maturity_mapping = {
        '5': '5D',
        '10': '10D',
        '21': '21D',
        '252': '252D'
    }

    # Pattern to identify KJD1, KJD2, GBM, and YFinance based on specification part
    specification_patterns = {
        'GBM': r"GBM-mu=\d+,\d+_sigma=\d+,\d+",  # For GBM
        'KJD1': r"Kou_Jump_Diffusion-mu=\d+,\d+_sigma=\d+,\d+_lambda=\d+,\d+_p=\d+,\d+_eta1=50,\d+_eta2=25,\d+",  # For KJD1
        'KJD2': r"Kou_Jump_Diffusion-mu=\d+,\d+_sigma=\d+,\d+_lambda=\d+,\d+_p=\d+,\d+_eta1=25,\d+_eta2=10,\d+",  # For KJD2
        'YFinance': r"YFinance-ticker=\^GSPC_start=\d{4}-\d{2}-\d{2}_end=2024-06-30"  # For YFinance
    }

    # Regular expression pattern to parse the filenames
    file_pattern = re.compile(
        r"(?P<option_type>asian|european|lookback)_"  # Option type
        r"(?P<call_put>call|put)_option_"  # Call or put
        r"(?P<plot_type>prices|dev|dev_rel|dev_rel_zoom)_"  # Plot type
        r"(?P<specification>GBM-mu=\d+,\d+_sigma=\d+,\d+|Kou_Jump_Diffusion-mu=\d+,\d+_sigma=\d+,\d+_lambda=\d+,\d+_p=\d+,\d+_eta1=\d+,\d+_eta2=\d+,\d+|YFinance-ticker=\^GSPC_start=\d{4}-\d{2}-\d{2}_end=2024-06-30)"  # Specification
        r".*n-in=\d+Y.*"  # Training sample size (ignored)
        r"nDays=(?P<option_maturity>\d+)"  # Option maturity in days
    )

    # Traverse through files in the source folder
    for file_name in os.listdir(source_folder):
        # Ensure we're only working with files and not directories
        if os.path.isfile(os.path.join(source_folder, file_name)):
            # Match the file name with the regex pattern
            match = file_pattern.search(file_name)

            if match:
                option_type = match.group('option_type').capitalize()  # Option type: Asian, European, Lookback
                call_put = match.group('call_put').lower()  # Call or put
                plot_type = match.group('plot_type')  # Plot type: price, dev, dev_rel, dev_zoom
                option_maturity = match.group('option_maturity')  # Option maturity in days
                specification = match.group('specification')  # Model specification

                # Determine the specification folder (GBM, KJD1, KJD2, or YFinance)
                if re.match(specification_patterns['GBM'], specification):
                    spec_folder = 'GBM'
                elif re.match(specification_patterns['KJD1'], specification):
                    spec_folder = 'KJD1'
                elif re.match(specification_patterns['KJD2'], specification):
                    spec_folder = 'KJD2'
                elif re.match(specification_patterns['YFinance'], specification):
                    spec_folder = 'YFinance'
                else:
                    continue  # Skip if no match

                # Determine the maturity folder
                maturity_folder = maturity_mapping.get(option_maturity, f"{option_maturity}D")

                # Create the folder path
                destination_folder = os.path.join(source_folder, spec_folder, option_type, maturity_folder, call_put, plot_type)

                # Create the folders if they do not exist
                os.makedirs(destination_folder, exist_ok=True)

                # Adjust file name if names are too long (Windows limitation, "hidden" error)
                new_file_name = file_name[0:]  

                # Move the file to the new folder
                shutil.move(os.path.join(source_folder, file_name), os.path.join(destination_folder, new_file_name))
                print(f"Moved {file_name} to {destination_folder}")
            else:
                # Debugging output: print if no match is found
                print(f"No match for file: {file_name}")

In [4]:
# Path to the folder containing the files to be sorted
source_folder = market_gen_path + "/plots"  # Replace this with your actual folder path

# Call the function to sort files
sort_files(source_folder)