In [0]:
import boto3
import pandas as pd
import json
from io import StringIO

# Initialize S3 client (use configured AWS CLI or environment variables)
s3 = boto3.client('s3', region_name='us-east-1')

# S3 bucket details
bucket_name = 'user-b194464884bf-bucket'

# Define the partitions and corresponding DataFrame names
partitions = {
    'b194464884bf.geo': 'df_geo',
    'b194464884bf.pin': 'df_pin',
    'b194464884bf.user': 'df_user'
}

# Loop through each partition and create a separate DataFrame
for partition, df_name in partitions.items():
    folder_prefix = f'topics/{partition}/partition=2/'  # Partition 2 folder
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix)

    # Initialize a list to store DataFrames for the current partition
    df_list = []

    # Loop through each JSON file in the partition
    if 'Contents' in response:
        for obj in response['Contents']:
            file_key = obj['Key']
            if file_key.endswith('.json'):
                # Read file content
                file_obj = s3.get_object(Bucket=bucket_name, Key=file_key)
                file_content = file_obj['Body'].read().decode('utf-8')

                # Parse JSON and append DataFrame
                data = json.loads(file_content)
                df_list.append(pd.json_normalize(data))

        # Combine all DataFrames for the partition
        if df_list:
            globals()[df_name] = pd.concat(df_list, ignore_index=True)


In [0]:
df_geo

In [0]:
df_pin

In [0]:
df_user