In [1]:
import os
import shutil
import re
current_directory = os.getcwd()
market_gen_path = os.path.dirname(current_directory)

In [2]:
def sort_files(source_folder):
    # Create a dictionary to map option maturity to the correct folder name
    maturity_mapping = {
        '5_days': '5D',
        '10_days': '10D',
        '21_days': '21D',
        '252_days': '252D'
    }
    
    # List of retrained inputs for n-in, excluding the special case "0Y"
    same_model_inputs = ['3Y', '10Y', '100Y', '1000Y', '10000Y']
    
    # Updated flexible regular expression pattern with handling for n-in=0Y
    file_pattern = re.compile(r"n-in=(?P<input_years>\d+[A-Za-z])[_-]"   # Matches the n-in part (e.g., n-in=3Y)
                              r"(?P<option_maturity>\d+_days)[_-]"       # Matches the option maturity (e.g., 5_days)
                              r"(?P<option_type>call|put)"               # Matches call or put
                              r"(?P<model_suffix>retrained_model_|same_model_)?\.png$", re.IGNORECASE)  # Optional suffix

    # Traverse through files in the source folder
    for file_name in os.listdir(source_folder):
        # Ensure we're only working with files and not directories
        if os.path.isfile(os.path.join(source_folder, file_name)):            
            # Match the file name with the regex pattern
            match = file_pattern.search(file_name)
            
            if match:
                input_years = match.group('input_years')
                option_maturity = match.group('option_maturity').strip('_')
                option_type = match.group('option_type').lower()  # Normalize to lowercase
                model_suffix = match.group('model_suffix')  # Can be 'retrained_model_', 'same_model_', or None
                                
                # Determine the first folder level
                if input_years == '0Y':
                    # Special case: n-in=0Y, we determine the folder from the model suffix
                    if model_suffix is not None:
                        first_level_folder = "Retrained" if 'retrained_model_' in model_suffix else "SameModel"
                    else:
                        print(f"Error: no model suffix found for {file_name}")
                        continue
                else:
                    # Normal case: Based on retrained_inputs
                    first_level_folder = "SameModel" if input_years in same_model_inputs else "Retrained"
                
                # Determine the second folder level: maturity (5D, 10D, etc.)
                second_level_folder = maturity_mapping.get(option_maturity, option_maturity)
                
                # Determine the third folder level: call or put
                third_level_folder = option_type
                
                # Create the folder path
                destination_folder = os.path.join(source_folder, first_level_folder, second_level_folder, third_level_folder)
                
                # Create the folders if they do not exist
                os.makedirs(destination_folder, exist_ok=True)
                
                # Move the file to the new folder
                shutil.move(os.path.join(source_folder, file_name), os.path.join(destination_folder, file_name))
                print(f"Moved {file_name} to {destination_folder}")
            else:
                # Debugging output: print if no match is found
                print(f"No match for file: {file_name}")

In [None]:
# Path to the folder containing the files to be sorted
source_folder = market_gen_path + "/stat_plots/YFinance"  # Replace this with your actual folder path

# Call the function to sort files
sort_files(source_folder)