In [30]:
import boto3
import json

In [31]:
s3 = boto3.client("s3", region_name="us-east-1")

In [32]:
def list_files(s3, bucket_name):
        """
        List all files in the S3 bucket.

        :return: List of file keys in the bucket.
        """
        try:
            response = s3.list_objects_v2(Bucket=bucket_name)
            if 'Contents' in response:
                return [file['Key'] for file in response['Contents']]
            else:
                return []
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

In [33]:
def download_file(s3, bucket_name, object_key, file_path):
    """
    Download a text file from an S3 bucket using get_object.

    :param s3: The boto3 S3 client.
    :param bucket_name: The name of the S3 bucket.
    :param object_key: The key of the file to download.
    :param file_path: The local path to save the downloaded file.
    :return: The content of the text file if downloaded successfully, None otherwise.
    """
    try:
        response = s3.get_object(Bucket=bucket_name, Key=object_key)

        # Read and decode the content of the file
        content = response['Body'].read()

        # Optionally save the text content to a file
        with open(file_path, 'w') as file:
            file.write(content)

        print(f"File '{object_key}' downloaded and saved successfully to '{file_path}'.")
        return content
    except s3.exceptions.NoSuchBucket:
        print(f"Bucket '{bucket_name}' does not exist.")
    except s3.exceptions.NoSuchKey:
        print(f"File '{object_key}' does not exist in bucket '{bucket_name}'.")
    except Exception as e:
        print(f"Error downloading file: {str(e)}")
    return None


In [34]:
libros = list_files(s3, "datalake-books")

In [35]:
def download_all_files(s3, bucket_name, object_keys, destination_folder):
    """
    Download multiple text files from an S3 bucket recursively.

    :param s3: The boto3 S3 client.
    :param bucket_name: The name of the S3 bucket.
    :param object_keys: A list of keys of the files to download.
    :param destination_folder: The local folder to save the downloaded files.
    :return: A dictionary with keys as file names and values as the text content.
    """
    downloaded_files = {}
    for object_key in object_keys:
        try:
            file_name = object_key.split('/')[-1]  # Extract file name from key
            local_path = f"{destination_folder}/{file_name}"
            text_content = download_file(s3, bucket_name, object_key, local_path)

            if text_content is not None:
                downloaded_files[object_key] = text_content
        except Exception as e:
            print(f"Error processing file '{object_key}': {str(e)}")
    return downloaded_files

In [36]:
download_all_files(s3, "datalake-books", libros, "datalake-books")

Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes
Error downloading file: write() argument must be str, not bytes


{}

In [38]:
import os

def download_txt_files(s3_client, bucket_name, local_folder):
        """
        Downloads all .txt files from a specified S3 bucket and prefix to a local folder.
        """
        objects = s3_client.list_objects_v2(Bucket=bucket_name)

        if 'Contents' not in objects:
            print("No files found in the bucket with the specified prefix.")
            return []
        
        os.makedirs(os.path.dirname(local_folder), exist_ok=True)

        downloaded_files = []
        for obj in objects['Contents']:
            file_key = obj['Key']
            if file_key.endswith('.txt'):
                local_path = os.path.join(local_folder, os.path.basename(file_key))
                s3_client.download_file(bucket_name, file_key, local_path)
                downloaded_files.append(local_path)

        return downloaded_files

In [40]:
download_txt_files(s3, "datalake-books", "datalake-books")

['datalake-books\\2500.txt',
 'datalake-books\\2501.txt',
 'datalake-books\\2502.txt',
 'datalake-books\\2503.txt',
 'datalake-books\\2504.txt',
 'datalake-books\\2505.txt',
 'datalake-books\\2506.txt',
 'datalake-books\\2507.txt',
 'datalake-books\\2508.txt',
 'datalake-books\\2509.txt']

In [42]:
def upload_json_file(s3_client, bucket_name, json_data, s3_key):
        """
        Uploads a JSON object to a specified S3 bucket.

        :param bucket_name: Name of the S3 bucket.
        :param json_data: JSON object or dictionary to upload.
        :param s3_key: S3 key (path) where the file will be stored.
        """
        try:
            json_string = json.dumps(json_data)
            s3_client.put_object(Body=json_string, Bucket=bucket_name, Key=s3_key, ContentType='application/json')
            print(f"JSON file uploaded successfully to {bucket_name}/{s3_key}")
        except Exception as e:
            print(f"An error occurred while uploading the JSON file: {e}")

In [None]:
upload_json_file(s3, "wordgraph-tcsd", /api/graph.json, "graph.json")

JSON file uploaded successfully to wordgraph-tcsd/graph.json
