In [2]:
import csv
import random
import string
import time
from datetime import datetime

def generate_csv_files(num_rows, num_files):
    """
    Generate CSV files with specified number of rows and files.

    Args:
        num_rows (int): Number of rows per CSV file.
        num_files (int): Total number of CSV files to create.
    """
    # Define the columns
    columns = [
        "exported", "productid", "product", "price", "depacher", "arrival",
        "dateofcreation", "location", "age", "group", "usage", "product_age"
    ]

    for file_counter in range(1, num_files + 1):
        file_name = f"data_file_{file_counter}.csv"
        rows = []

        # Generate specified number of rows for the current file
        for _ in range(num_rows):
            row = {
                "exported": random.choice(["Yes", "No"]),
                "productid": ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)),
                "product": f"Product-{random.randint(1, 500)}",
                "price": round(random.uniform(10.0, 1000.0), 2),
                "depacher": f"Dep-{random.randint(1, 50)}",
                "arrival": f"Arr-{random.randint(1, 50)}",
                "dateofcreation": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "location": random.choice(["Mumbai", "Delhi", "Bangalore", "Pune"]),
                "age": random.randint(18, 60),
                "group": random.choice(["A", "B", "C", "D"]),
                "usage": random.choice(["Low", "Medium", "High"]),
                "product_age": random.randint(1, 10),
            }
            rows.append(row)

        # Write data to a CSV file
        with open(file_name, mode="w", newline="") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=columns)
            writer.writeheader()
            writer.writerows(rows)

        print(f"Generated file: {file_name}")

        # Wait 10 seconds before generating the next file (if more files are to be created)
        if file_counter < num_files:
            time.sleep(10)

# Example usage: Create 5 CSV files with 100 rows each
generate_csv_files(num_rows=100, num_files=2)

Generated file: data_file_1.csv
Generated file: data_file_2.csv


In [None]:
import csv
import random
import string
import time
from datetime import datetime
import boto3
from botocore.exceptions import NoCredentialsError, PartialCredentialsError

def generate_csv_files_to_s3(num_rows, num_files, bucket_name, s3_folder, wait_time=10):
    """
    Generate CSV files and upload them to an S3 bucket.

    Args:
        num_rows (int): Number of rows per CSV file.
        num_files (int): Total number of CSV files to create.
        bucket_name (str): Name of the S3 bucket.
        s3_folder (str): Folder path within the S3 bucket where files will be stored.
        wait_time (int): Time to wait (in seconds) between generating files. Default is 10 seconds.

    Examples for wait_time:
        - 30 minutes: wait_time = 30 * 60  (1800 seconds)
        - 1 hour: wait_time = 60 * 60  (3600 seconds)
        - 4 hours: wait_time = 4 * 60 * 60  (14400 seconds)
        - 1 day: wait_time = 24 * 60 * 60  (86400 seconds)
    """
    # Define the columns
    columns = [
        "exported", "productid", "product", "price", "depacher", "arrival",
        "dateofcreation", "location", "age", "group", "usage", "product_age"
    ]

    # Initialize S3 client
    s3_client = boto3.client("s3")

    for file_counter in range(1, num_files + 1):
        file_name = f"data_file_{file_counter}.csv"
        rows = []

        # Generate specified number of rows for the current file
        for _ in range(num_rows):
            row = {
                "exported": random.choice(["Yes", "No"]),
                "productid": ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)),
                "product": f"Product-{random.randint(1, 500)}",
                "price": round(random.uniform(10.0, 1000.0), 2),
                "depacher": f"Dep-{random.randint(1, 50)}",
                "arrival": f"Arr-{random.randint(1, 50)}",
                "dateofcreation": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "location": random.choice(["Mumbai", "Delhi", "Bangalore", "Pune"]),
                "age": random.randint(18, 60),
                "group": random.choice(["A", "B", "C", "D"]),
                "usage": random.choice(["Low", "Medium", "High"]),
                "product_age": random.randint(1, 10),
            }
            rows.append(row)

        # Write data to a local CSV file
        with open(file_name, mode="w", newline="") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=columns)
            writer.writeheader()
            writer.writerows(rows)

        print(f"Generated file: {file_name}")

        # Upload the CSV file to the S3 bucket
        s3_key = f"{s3_folder}/{file_name}"
        try:
            s3_client.upload_file(file_name, bucket_name, s3_key)
            print(f"Uploaded {file_name} to s3://{bucket_name}/{s3_key}")
        except (NoCredentialsError, PartialCredentialsError) as e:
            print(f"Failed to upload {file_name} to S3: {e}")
        except Exception as e:
            print(f"An error occurred while uploading {file_name}: {e}")

        # Wait for the specified time before generating the next file (if more files are to be created)
        if file_counter < num_files:
            print(f"Waiting {wait_time} seconds before generating the next file...")
            time.sleep(wait_time)

# Example usage: Generate 2 CSV files with 100 rows each and upload them to S3
generate_csv_files_to_s3(
    num_rows=10, 
    num_files=2, 
    bucket_name="", 
    s3_folder="testing",
    wait_time=10
)

Generated file: data_file_1.csv
Uploaded data_file_1.csv to s3://yash-testing-fug/testing/data_file_1.csv
Waiting 10 seconds before generating the next file...
Generated file: data_file_2.csv
Uploaded data_file_2.csv to s3://yash-testing-fug/testing/data_file_2.csv
