In [1]:
import pandas as pd
from tqdm import tqdm

def merge_parquet_files(file_list):
    """
    Merges a list of Parquet files into a single DataFrame with a progress bar.

    Parameters:
    file_list (list of str): List of paths to Parquet files.

    Returns:
    pd.DataFrame: Merged DataFrame containing data from all Parquet files.
    """
    # Initialize an empty list to hold DataFrames
    data_frames = []

    # Iterate over the file list with a progress bar
    for file in tqdm(file_list, desc="Merging Parquet files"):
        # Read each Parquet file into a DataFrame
        df = pd.read_parquet(file)
        # Append the DataFrame to the list
        data_frames.append(df)

    # Concatenate all DataFrames into a single DataFrame
    merged_df = pd.concat(data_frames, ignore_index=True)

    return merged_df


In [2]:

    # List of Parquet file paths
    parquet_files = ['pupil_combined.parquet', 'run_combined.parquet']
    
    # Merge the Parquet files
    merged_df = merge_parquet_files(parquet_files)
    
    # Display the merged DataFrame
    merged_df.to_parquet('run_pupil_merged.parquet')


Merging Parquet files: 100%|██████████| 2/2 [00:06<00:00,  3.10s/it]


In [7]:

# Example usage
file_list = ['pupil_combined.parquet', 'run_combined.parquet']
output_file = 'run_pupil_merged.parquet'
merge_parquet_files(file_list)


Merging Parquet files:  50%|█████     | 1/2 [00:06<00:06,  6.69s/it]


KeyboardInterrupt: 