In [None]:
import boto3
import csv
import io

def split_large_csv(s3_bucket, source_key, chunk_size, output_prefix):
    s3_client = boto3.client('s3')
    
    # Get the object from S3
    response = s3_client.get_object(Bucket=s3_bucket, Key=source_key)
    body = response['Body']
    
    # Create a CSV reader
    csv_reader = csv.reader(io.TextIOWrapper(body, encoding='utf-8'))
    
    # Initialize variables
    header = next(csv_reader)  # Read the header
    chunk_number = 0
    current_chunk = []
    
    for row in csv_reader:
        current_chunk.append(row)
        
        # If the current chunk reaches the specified size, write it to S3
        if len(current_chunk) >= chunk_size:
            chunk_number += 1
            write_chunk_to_s3(s3_client, current_chunk, header, s3_bucket, output_prefix, chunk_number)
            current_chunk = []  # Reset for the next chunk
    
    # Write any remaining rows in the last chunk
    if current_chunk:
        chunk_number += 1
        write_chunk_to_s3(s3_client, current_chunk, header, s3_bucket, output_prefix, chunk_number)

def write_chunk_to_s3(s3_client, chunk, header, s3_bucket, output_prefix, chunk_number):
    output_key = f"{output_prefix}/chunk_{chunk_number}.csv"
    output = io.StringIO()
    
    # Create a CSV writer
    csv_writer = csv.writer(output)
    csv_writer.writerow(header)  # Write the header
    csv_writer.writerows(chunk)   # Write the chunk rows
    
    # Upload the chunk to S3
    s3_client.put_object(Bucket=s3_bucket, Key=output_key, Body=output.getvalue())
    print(f"Uploaded {output_key}")

# Example usage
split_large_csv('your-bucket-name', 'path/to/large_file.csv', 10000, 'path/to/output')