In [1]:
import boto3
from faker import Faker
import random 
import pandas as pd
from io import StringIO
from datetime import datetime



In [2]:
fake = Faker()
s3 = boto3.client('s3')

In [3]:
def generate_order_data(num_rows=100):
    data = []
    for _ in range(num_rows):
        order = {
            'order_id': fake.uuid4(),
            'customer_id': fake.uuid4(),
            'order_date': fake.date_this_year(),
            'status': random.choice(['CREATED', 'SHIPPED', 'DELIVERED', 'CANCELLED']),
            'product_id': fake.uuid4(),
            'quantity': random.randint(1, 5),
            'price': round(random.uniform(10.0, 500.0), 2),
            'total_amount': 0.0,  # We'll calculate this next
            'cdc_timestamp': datetime.now()   # Simulate CDC timestamp
        }
        order['total_amount'] = round(order['quantity'] * order['price'], 2)
        data.append(order)

    # Convert to DataFrame
    df = pd.DataFrame(data)
    return df

In [4]:
def upload_to_s3(df, bucket_name, file_name):
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    s3.put_object(Bucket=bucket_name, Key=file_name, Body=csv_buffer.getvalue())
    print(f"Uploaded {file_name} to bucket {bucket_name}")

df_order_data = generate_order_data(20)
bucket_name = '***'  
file_name = 'orders/orders_data_2.csv'

upload_to_s3(df_order_data, bucket_name, file_name)


Uploaded orders/orders_data_2.csv to bucket ***
