In [4]:
import pandas as pd 
import numpy as np
import os

def combine_excel_files(folder_path):
    """
    Combines all Excel files within a folder into one DataFrame.

    Parameters:
    - folder_path (str): Path to the folder containing Excel files.

    Returns:
    - combined_df (DataFrame): Combined DataFrame containing data from all Excel files.
    """
    # List all files in the directory
    file_names = os.listdir(folder_path)

    # Filter Excel files
    excel_files = [file for file in file_names if file.endswith('.xlsx')]

    # Initialize an empty list to store DataFrames
    dfs = []

    # Iterate over each Excel file
    for file in excel_files:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(os.path.join(folder_path, file))
        # Append the DataFrame to the list
        dfs.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df

In [5]:
def combine_csv_files(folder_path):
    """
    Combines all CSV files within a folder into one DataFrame.

    Parameters:
    - folder_path (str): Path to the folder containing CSV files.

    Returns:
    - combined_df (DataFrame): Combined DataFrame containing data from all CSV files.
    """
    # List all files in the directory
    file_names = os.listdir(folder_path)

    # Filter CSV files
    csv_files = [file for file in file_names if file.endswith('.csv')]

    # Initialize an empty list to store DataFrames
    dfs = []

    # Iterate over each CSV file
    for file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(folder_path, file))
        # Append the DataFrame to the list
        dfs.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df

In [None]:

def combine_csv_files_in_years(base_folder_path, start_year, end_year):
    """
    Combines all CSV files within folders for the given range of years into one DataFrame.

    Parameters:
    - base_folder_path (str): Base path to the folder containing CSV files with changing years.
    - start_year (int): Starting year.
    - end_year (int): Ending year.

    Returns:
    - combined_df (DataFrame): Combined DataFrame containing data from all CSV files.
    """
    # Initialize an empty list to store DataFrames
    dfs = []

    # Iterate over each year
    for year in range(start_year, end_year + 1):
        # Folder path for the current year
        folder_path = os.path.join(base_folder_path, str(year))

        # Check if the folder exists
        if os.path.exists(folder_path):
            # List all files in the directory
            file_names = os.listdir(folder_path)

            # Filter CSV files
            csv_files = [file for file in file_names if file.endswith('.csv')]

            # Iterate over each CSV file
            for file in csv_files:
                # Read the CSV file into a DataFrame
                df = pd.read_csv(os.path.join(folder_path, file))
                # Append the DataFrame to the list
                dfs.append(df)
        else:
            print("Folder '{}' does not exist.".format(folder_path))

    # Concatenate all DataFrames into one
    combined_df = pd.concat(dfs, ignore_index=True)

    return combined_df