In [1]:
import os
import pandas as pd

In [2]:
# def aggregate_csv_files(folder_path, output_path_template, x_values):

#     """
#     Aggregate all .csv files in a folder into a single file for specified values of x.

#     Args:
#         folder_path (str): The path to the folder containing the .csv files.
#         output_path (str): The path to the output file to create.
#         x_values (list): A list of values of x to aggregate files for.

#     Returns:
#         None
#     """


#     for x in x_values:
#         csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv') and f.startswith(str(x))]
#         csv_files.sort(key=lambda x: int(''.join(filter(str.isdigit, x))))
#         print(csv_files)
#         df = pd.concat([pd.read_csv(os.path.join(folder_path, f)) for f in csv_files], ignore_index=True, axis = 0)
#         output_path = output_path_template.format(x)  # Insert x value into output file name
#         print(output_path)
#         df.to_csv(output_path, index=False)


def aggregate_csv_files(folder_path, output_path_template, x_values):
    """
    Aggregate all .csv files in a folder into a single file for specified values of x.

    Args:
        folder_path (str): The path to the folder containing the .csv files.
        output_path_template (str): The path template for the output file to create. It should include a placeholder for the 'x' value.
        x_values (list): A list of values of x to aggregate files for.

    Returns:
        aggregated_counts (dict): A dictionary mapping each 'x' value to the number of CSV files aggregated.
    """
    import os
    import pandas as pd

    aggregated_counts = {}

    for x in x_values:
        # Filter and sort CSV files based on their names
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv') and f.startswith(str(x))]
        csv_files.sort(key=lambda x: int(''.join(filter(str.isdigit, x))))

        # Check if the number of CSV files is correct
        expected_count = int(36)  # Assuming that the expected count is the 'x' value
        actual_count = len(csv_files)

        if actual_count != expected_count:
            raise ValueError(f"Error for x={x}: Expected {expected_count} CSV files, but found {actual_count}.")

        # Concatenate data from CSV files into a single DataFrame
        df = pd.concat([pd.read_csv(os.path.join(folder_path, f)) for f in csv_files], ignore_index=True, axis=0)

        # Construct the output path by inserting the 'x' value into the output file template
        output_path = output_path_template.format(x)

        # Save the aggregated DataFrame to a new CSV file
        df.to_csv(output_path, index=False)

        # Store the number of aggregated files for this 'x' in the dictionary
        aggregated_counts[x] = actual_count

    return aggregated_counts


In [3]:
folder_path = 'C:\\Users\\nkakhani\\_Multimodal\\SoilNet-7\\SoilNet-PreRelease\\dataset\\Climate'
output_path_template = 'C:\\Users\\nkakhani\\_Multimodal\\SoilNet-7\\SoilNet-PreRelease_{}.csv'


climate_values = ['aet', 'def', 'pdsi', 'pet', 'pr', 'ro', 'soil', 'srad', 'swe', 'tmmn', 'tmmx', 'vap', 'vpd']  # Example x values
# x_values = ['srad']  # Example x values

aggregate_csv_files(folder_path, output_path_template, climate_values)

{'aet': 36,
 'def': 36,
 'pdsi': 36,
 'pet': 36,
 'pr': 36,
 'ro': 36,
 'soil': 36,
 'srad': 36,
 'swe': 36,
 'tmmn': 36,
 'tmmx': 36,
 'vap': 36,
 'vpd': 36}