In [None]:
import pyarrow.parquet as pq

In [2]:
def get_list_of_s3_files(s3_resource, BUCKET, PREFIX):
    """
    This function takes in the name of an Amazon S3 bucket and a prefix for an S3 key 
    and returns a list of S3 object keys that match the given prefix.

    :param s3_resource: A boto s3 object
    :type s3_resource: boto3.resources.factory.s3.ServiceResource
    :param BUCKET: A string representing the name of an S3 bucket
    :type BUCKET: str
    :param PREFIX: A string representing the prefix to search for in the S3 objects' keys
    :type PREFIX: str

    :return: A list of S3 object keys that match the given prefix
    :rtype: list[str]
    
    Example Usage:
    >>> s3_files = get_list_of_s3_files('my-s3-bucket', 'path/to/my/files/')
    >>> print(s3_files)
    ['path/to/my/files/file1.txt', 'path/to/my/files/file2.txt', 'path/to/my/files/file3.txt']
    """
    bucket = s3_resource.Bucket(BUCKET)
    objects = bucket.objects.filter(Prefix=PREFIX)
    return [obj.key for obj in objects]

In [None]:
def read_s3_file(s3_resource, bucket_name, file_key):
    """
    Reads the contents of a file stored on S3.

    :param s3_resource: A boto s3 object
    :type s3_resource: boto3.resources.factory.s3.ServiceResource
    :param bucket_name: The name of the S3 bucket.
    :type bucket_name: str
    :param file_key: The unique key of the file in the S3 bucket.
    :type file_key: str
    :return: The contents of the file as a string.
    :rtype: str
    """
    s3_object = s3_resource.Object(bucket_name, file_key)
    object_content = s3_object.get()['Body'].read().decode('utf-8')
    return object_content

In [None]:
def read_parquet_file(s3_reader, bucket_name, file_key):
    """
    Reads a Parquet file from an Amazon S3 bucket and returns the data as a Pandas DataFrame.

    Parameters:
        s3_reader (boto3.resources.factory.s3.ServiceResource): An instance of `boto3.resource('s3')` used for S3 operations.
        bucket_name (str): The name of the S3 bucket where the Parquet file is located.
        file_key (str): The key (path) of the Parquet file within the S3 bucket.

    Returns:
        pandas.core.frame.DataFrame: A Pandas DataFrame containing the data from the Parquet file.

    Raises:
        botocore.exceptions.NoCredentialsError: If the AWS credentials are not configured or missing.
        botocore.exceptions.ParamValidationError: If the provided bucket name or file key is invalid.
        botocore.exceptions.EndpointConnectionError: If there is an issue connecting to the S3 endpoint.
        pyarrow.lib.ArrowIOError: If there is an error reading the Parquet file.

    Example Usage:
        import boto3
        import pyarrow.parquet as pq

        # Create an S3 resource
        s3_resource = boto3.resource('s3')

        # Read a Parquet file from S3
        bucket_name = 'my-bucket'
        file_key = 'path/to/parquet/file.parquet'
        parquet_data = read_parquet_file(s3_resource, bucket_name, file_key)
        print(parquet_data.head())
    """
    s3_object = s3_reader.Object(bucket_name, file_key)
    s3_object.download_fileobj(buffer)
    table = pq.read_table(buffer)
    parquet_df = table.to_pandas()
    return parquet_df
