In [None]:
import pandas as pd  # Import for handling DataFrames
import json  # Import for reading JSON files
import xml.etree.ElementTree as ET  #Import for parsing XML files
from PIL import Image  # Import from PIL (Pillow) for handling image files
from pydub import AudioSegment  # Import from pydub for handling audio files
import cv2  # Import for handling video files
import concurrent.futures  # Import for parallel execution

def load_data(file_path):

    file_extension = file_path.split('.')[-1].lower()        # Determine the file extension from the file path

    try:
        # Load CSV files into a DataFrame
        if file_extension == 'csv':
            return pd.read_csv(file_path)
        # Load Excel files (both .xls and .xlsx) into a DataFrame
        elif file_extension in ['xls', 'xlsx']:
            return pd.read_excel(file_path)
        # Load JSON files into a Python object
        elif file_extension == 'json':
            with open(file_path, 'r') as f:
                return json.load(f)
        # Parse XML files and return the root of the XML tree
        elif file_extension == 'xml':
            tree = ET.parse(file_path)
            return tree.getroot()
        # Load HDF5 files into a DataFrame
        elif file_extension in ['h5', 'hdf5']:
            return pd.read_hdf(file_path)
        # Load Feather files into a DataFrame
        elif file_extension == 'feather':
            return pd.read_feather(file_path)
        # Read text files as plain text
        elif file_extension == 'txt':
            with open(file_path, 'r') as f:
                return f.read()
        # Open image files (JPG, JPEG) using PIL
        elif file_extension in ['jpg', 'jpeg']:
            return Image.open(file_path)
        # Load audio files (MP3, WAV) using pydub
        elif file_extension in ['mp3', 'wav']:
            return AudioSegment.from_file(file_path)
        # Open video files (MP4, AVI, MKV) using cv2
        elif file_extension in ['mp4', 'avi', 'mkv']:
            return cv2.VideoCapture(file_path)
        else:
            # Raise an error if the file format is not supported
            raise ValueError(f"Unsupported file format: {file_extension}")
    except Exception as e:
        # Print error message if an exception occurs during file loading
        print(f"Error loading {file_path}: {e}")
        return None

def read_multiple_files(file_paths):
    # List to store loaded data from each file
    loaded_data = []

    # Use ThreadPoolExecutor to load files concurrently
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit load_data tasks for each file path
        future_to_file = {executor.submit(load_data, file_path): file_path for file_path in file_paths}
        # Process each future as it completes
        for future in concurrent.futures.as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                # Retrieve the result of the future
                data = future.result()
                # Add the data to loaded_data if it is not None
                if data is not None:
                    loaded_data.append(data)
            except Exception as e:
                # Print error message if an exception occurs during future execution
                print(f"Error loading {file_path}: {e}")

    return loaded_data

def merge_files_side_by_side(file_paths):
    # Read multiple files and get a list of DataFrames
    dfs = read_multiple_files(file_paths)
    # Filter out only DataFrames from the list
    dfs = [df for df in dfs if isinstance(df, pd.DataFrame)]

    # Ensure that at least two DataFrames are available for merging
    if len(dfs) < 2:
        raise ValueError("At least two files are required for merging.")

    # Concatenate DataFrames side-by-side (i.e., column-wise)
    merged_df = pd.concat(dfs, axis=1)

    return merged_df

# Example usage
file_paths = ["Customer_Churn_Prediction.csv", "Medical_insurance.csv"]  # List of file paths to merge
merged_df = merge_files_side_by_side(file_paths)  # Merge the files side-by-side

print(merged_df)


       age     sex     bmi  children smoker     region      charges state  \
0     19.0  female  27.900       0.0    yes  southwest  16884.92400    OH   
1     18.0    male  33.770       1.0     no  southeast   1725.55230    NJ   
2     28.0    male  33.000       3.0     no  southeast   4449.46200    OH   
3     33.0    male  22.705       0.0     no  northwest  21984.47061    OK   
4     32.0    male  28.880       0.0     no  northwest   3866.85520    MA   
...    ...     ...     ...       ...    ...        ...          ...   ...   
4245   NaN     NaN     NaN       NaN    NaN        NaN          NaN    MT   
4246   NaN     NaN     NaN       NaN    NaN        NaN          NaN    WV   
4247   NaN     NaN     NaN       NaN    NaN        NaN          NaN    NC   
4248   NaN     NaN     NaN       NaN    NaN        NaN          NaN    HI   
4249   NaN     NaN     NaN       NaN    NaN        NaN          NaN    VT   

      account_length      area_code  ... total_eve_calls total_eve_charge  

In [None]:
# Save the merged DataFrame to a CSV file
output_file_path = "merged_output.csv"
merged_df.to_csv(output_file_path, index=False)

# Print the file path of the saved CSV
print(f"Merged DataFrame saved to {output_file_path}")

Merged DataFrame saved to merged_output.csv
