In [None]:
!pip install pydub
import pandas as pd  # Import for handling DataFrames
import json  # Import for reading JSON files
import xml.etree.ElementTree as ET  # Import for parsing XML files
from PIL import Image  # Import from PIL (Pillow) for handling image files
from pydub import AudioSegment  # Import from pydub for handling audio files
import cv2  # Import for handling video files
import concurrent.futures  # Import for parallel execution

def load_data(file_path):

    file_extension = file_path.split('.')[-1].lower()  # Determine the file extension from the file path

    try:
        # Load CSV files into a DataFrame
        if file_extension == 'csv':
            return pd.read_csv(file_path)
        # Load Excel files (both .xls and .xlsx) into a DataFrame,
        elif file_extension in ['xls', 'xlsx']:
            return pd.read_excel(file_path)
        # Load JSON files into a Python object
        elif file_extension == 'json':
            with open(file_path, 'r') as f:
                return json.load(f)
        # Parse XML files and return the root of the XML tree
        elif file_extension == 'xml':
            tree = ET.parse(file_path)
            return tree.getroot()
        # Load HDF5 files into a DataFrame
        elif file_extension in ['h5', 'hdf5']:
            return pd.read_hdf(file_path)
        # Load Feather files into a DataFrame
        elif file_extension == 'feather':
            return pd.read_feather(file_path)
        # Read text files as plain text
        elif file_extension == 'txt':
            with open(file_path, 'r') as f:
                return f.read()
        # Open image files (JPG, JPEG) using PIL
        elif file_extension in ['jpg', 'jpeg']:
            return Image.open(file_path)
        # Load audio files (MP3, WAV) using pydub
        elif file_extension in ['mp3', 'wav']:
            return AudioSegment.from_file(file_path)
        # Open video files (MP4, AVI, MKV) using cv2
        elif file_extension in ['mp4', 'avi', 'mkv']:
            return cv2.VideoCapture(file_path)
        else:
            # Raise an error if the file format is not supported
            raise ValueError(f"Unsupported file format: {file_extension}")
    except Exception as e:
        # Print error message if an exception occurs during file loading
        print(f"Error loading {file_path}: {e}")
        return None

def read_multiple_files(file_paths):
    # List to store loaded data from each file
    loaded_data = []

    # Use ThreadPoolExecutor to load files concurrently
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit load_data tasks for each file path
        future_to_file = {executor.submit(load_data, file_path): file_path for file_path in file_paths}
        # Process each future as it completes
        for future in concurrent.futures.as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                # Retrieve the result of the future
                data = future.result()
                # Add the data to loaded_data if it is not None
                if data is not None:
                    loaded_data.append(data)
            except Exception as e:
                # Print error message if an exception occurs during future execution
                print(f"Error loading {file_path}: {e}")

    return loaded_data

def merge_files_side_by_side(file_paths):
    # Read multiple files and get a list of DataFrames
    dfs = read_multiple_files(file_paths)
    # Filter out only DataFrames from the list
    dfs = [df for df in dfs if isinstance(df, pd.DataFrame)]

    # Handle the case with zero or one DataFrame
    if len(dfs) == 0:
        return None  # or raise an error, depending on desired behavior
    elif len(dfs) == 1:
        return dfs[0]

    # Concatenate DataFrames side-by-side (i.e., column-wise)
    merged_df = pd.concat(dfs, axis=1)

    return merged_df

def get_file_paths_from_input():
    # Prompt user for input and split by comma
    file_paths = input("Enter file paths separated by commas: ").split(',')
    # Strip any extra whitespace from each file pathfile_paths = [file_path.strip() for file_path in file_paths]
    return file_paths

# Example usage
file_paths = get_file_paths_from_input()  # Get the list of file paths from user input
print(type(file_paths))

if len(file_paths) > 1:
    if_yes = input("Do you want to merge the files side-by-side? (yes/no): ")
    if if_yes == "yes":
        merged_df = merge_files_side_by_side(file_paths)  # Merge the files side-by-side
        print(merged_df.head())
    elif if_yes == "no":
        x = read_multiple_files(file_paths)  # Read the files individually
        for idx, file_data in enumerate(x):
            variable_name = f"file_{idx+1}"
            globals()[variable_name] = file_data

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Enter file paths separated by commas: /content/Life Expectancy Data.csv , /content/Medical_insurance.csv/content/Medical_insurance.csv
<class 'list'>
Do you want to merge the files side-by-side? (yes/no): no
Error loading /content/Life Expectancy Data.csv : Unsupported file format: csv 
Error loading  /content/Medical_insurance.csv/content/Medical_insurance.csv: [Errno 2] No such file or directory: ' /content/Medical_insurance.csv/content/Medical_insurance.csv'


In [None]:
from IPython import get_ipython
from IPython.display import display
# %%
!pip install pydub==1.0.0
import pandas as pd
import json
import xml.etree.ElementTree as ET
from PIL import Image
from pydub import AudioSegment
import cv2
import concurrent.futures

def load_data(file_path):
    file_extension = file_path.split('.')[-1].lower()
    try:
        if file_extension == 'csv':
            return pd.read_csv(file_path)
        elif file_extension in ['xls', 'xlsx']:
            return pd.read_excel(file_path)
        elif file_extension == 'json':
            with open(file_path, 'r') as f:
                return json.load(f)
        elif file_extension == 'xml':
            tree = ET.parse(file_path)
            return tree.getroot()
        elif file_extension in ['h5', 'hdf5']:
            return pd.read_hdf(file_path)
        elif file_extension == 'feather':
            return pd.read_feather(file_path)
        elif file_extension == 'txt':
            with open(file_path, 'r') as f:
                return f.read()
        elif file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
            return Image.open(file_path)
        elif file_extension in ['mp3', 'wav', 'ogg', 'flac']:
            return AudioSegment.from_file(file_path)
        elif file_extension in ['mp4', 'avi', 'mkv', 'mov']:
            return cv2.VideoCapture(file_path)
        else:
            raise ValueError(f"Unsupported file format: {file_extension}")
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

def read_multiple_files(file_paths):
    loaded_data = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_file = {executor.submit(load_data, file_path): file_path for file_path in file_paths}
        for future in concurrent.futures.as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                data = future.result()
                if data is not None:
                    loaded_data.append(data)
            except Exception as e:
                print(f"Error loading {file_path}: {e}")
    return loaded_data

def merge_files_side_by_side(file_paths):
    dfs = read_multiple_files(file_paths)
    dfs = [df for df in dfs if isinstance(df, pd.DataFrame)]
    if len(dfs) == 0:
        return None
    elif len(dfs) == 1:
        return dfs[0]
    merged_df = pd.concat(dfs, axis=1)
    return merged_df

def get_file_paths_from_input():
    file_paths = input("Enter file paths separated by commas: ").split(',')
    file_paths = [file_path.strip() for file_path in file_paths]
    return file_paths

file_paths = get_file_paths_from_input()
print(type(file_paths))

if len(file_paths) > 1:
    if_yes = input("Do you want to merge the files side-by-side? (yes/no): ")
    if if_yes.lower() == "yes":
        merged_df = merge_files_side_by_side(file_paths)
        if merged_df is not None:
          display(merged_df)
    elif if_yes.lower() == "no":
        loaded_files = read_multiple_files(file_paths)
        for idx, file_data in enumerate(loaded_files):
            variable_name = f"file_{idx+1}"
            globals()[variable_name] = file_data
            print(f"{variable_name}:")
            if isinstance(file_data, pd.DataFrame):
                display(file_data)
            else:
                print(file_data)
else:
    loaded_files = read_multiple_files(file_paths)
    for idx, file_data in enumerate(loaded_files):
        pass # Add your code here

[31mERROR: Could not find a version that satisfies the requirement pydub==1.0.0 (from versions: 0.1, 0.1.1, 0.2, 0.2.1, 0.2.2, 0.2.3, 0.2.4, 0.2.5, 0.2.6, 0.2.7, 0.2.8, 0.2.9, 0.3, 0.4, 0.4.1, 0.4.2, 0.5.0, 0.5.1, 0.5.2, 0.5.3, 0.5.4, 0.5.5, 0.5.6, 0.6.0, 0.6.1, 0.6.2, 0.6.3, 0.7.0, 0.7.1, 0.8.0, 0.8.1, 0.8.2, 0.8.3, 0.9.0, 0.9.1, 0.9.2, 0.9.3, 0.9.4, 0.9.5, 0.10.0, 0.11.0, 0.12.0, 0.14.0, 0.14.1, 0.14.2, 0.15.0, 0.16.0, 0.16.1, 0.16.2, 0.16.3, 0.16.4, 0.16.5, 0.16.6, 0.16.7, 0.17.0, 0.18.0, 0.19.0, 0.20.0, 0.21.0, 0.22.0, 0.22.1, 0.23.0, 0.23.1, 0.24.0, 0.24.1, 0.25.0, 0.25.1)[0m[31m
[0m[31mERROR: No matching distribution found for pydub==1.0.0[0m[31m
[0m

In [None]:
!pip install pydub
import pandas as pd  # Import for handling DataFrames
import json  # Import for reading JSON files
import xml.etree.ElementTree as ET  # Import for parsing XML files
from PIL import Image  # Import from PIL (Pillow) for handling image files
from pydub import AudioSegment  # Import from pydub for handling audio files
import cv2  # Import for handling video files
import concurrent.futures  # Import for parallel execution

def load_data(file_path):
    file_extension = file_path.split('.')[-1].lower()  # Determine the file extension from the file path

    try:
        # Load CSV files into a DataFrame
        if file_extension == 'csv':
            return pd.read_csv(file_path)
        # Load Excel files (both .xls and .xlsx) into a DataFrame
        elif file_extension in ['xls', 'xlsx']:
            return pd.read_excel(file_path)
        # Load JSON files into a Python object
        elif file_extension == 'json':
            with open(file_path, 'r') as f:
                return json.load(f)
        # Parse XML files and return the root of the XML tree
        elif file_extension == 'xml':
            tree = ET.parse(file_path)
            return tree.getroot()
        # Load HDF5 files into a DataFrame
        elif file_extension in ['h5', 'hdf5']:
            return pd.read_hdf(file_path)
        # Load Feather files into a DataFrame
        elif file_extension == 'feather':
            return pd.read_feather(file_path)
        # Read text files as plain text
        elif file_extension == 'txt':
            with open(file_path, 'r') as f:
                return f.read()
        # Open image files (JPG, JPEG) using PIL
        elif file_extension in ['jpg', 'jpeg']:
            return Image.open(file_path)
        # Load audio files (MP3, WAV) using pydub
        elif file_extension in ['mp3', 'wav']:
            return AudioSegment.from_file(file_path)
        # Open video files (MP4, AVI, MKV) using cv2
        elif file_extension in ['mp4', 'avi', 'mkv']:
            return cv2.VideoCapture(file_path)
        else:
            # Raise an error if the file format is not supported
            raise ValueError(f"Unsupported file format: {file_extension}")
    except Exception as e:
        # Print error message if an exception occurs during file loading
        print(f"Error loading {file_path}: {e}")
        return None

def read_multiple_files(file_paths):
    # List to store loaded data from each file
    loaded_data = []

    # Use ThreadPoolExecutor to load files concurrently
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit load_data tasks for each file path
        future_to_file = {executor.submit(load_data, file_path): file_path for file_path in file_paths}
        # Process each future as it completes
        for future in concurrent.futures.as_completed(future_to_file):
            file_path = future_to_file[future]
            try:
                # Retrieve the result of the future
                data = future.result()
                # Add the data to loaded_data if it is not None
                if data is not None:
                    loaded_data.append(data)
            except Exception as e:
                # Print error message if an exception occurs during future execution
                print(f"Error loading {file_path}: {e}")

    return loaded_data

def merge_files_side_by_side(file_paths):
    # Read multiple files and get a list of DataFrames
    dfs = read_multiple_files(file_paths)
    # Filter out only DataFrames from the list
    dfs = [df for df in dfs if isinstance(df, pd.DataFrame)]

    # Handle the case with zero or one DataFrame
    if len(dfs) == 0:
        return None  # or raise an error, depending on desired behavior
    elif len(dfs) == 1:
        return dfs[0]

    # Concatenate DataFrames side-by-side (i.e., column-wise)
    merged_df = pd.concat(dfs, axis=1)

    return merged_df

def get_file_paths_from_input():
    # Prompt user for input and split by comma
    file_paths = input("Enter file paths separated by commas: ").split(',')
    # Strip any extra whitespace from each file path
    file_paths = [file_path.strip() for file_path in file_paths]
    return file_paths

# Example usage
file_paths = get_file_paths_from_input()  # Get the list of file paths from user input
print(type(file_paths))

if len(file_paths) > 1:
    if_yes = input("Do you want to merge the files side-by-side? (yes/no): ")
    if if_yes == "yes":
        merged_df = merge_files_side_by_side(file_paths)  # Merge the files side-by-side
        print(merged_df.head())
    elif if_yes == "no":
        x = read_multiple_files(file_paths)  # Read the files individually
        for idx, file_data in enumerate(x):
            variable_name = f"file_{idx+1}"
            globals()[variable_name] = file_data
            print(f"{variable_name}:\n{file_data}\n")
else:
    single_file = load_data(file_paths[0])  # Load the single file
    print(single_file)


