In [11]:
import os
import csv

# Example usage
input_file = 'Inputfile.csv'
output_dir = 'split_files'
max_file_size = 40 #Specify max size (mb) for each individual split file


chunk_size = max_file_size*0.95

def split_csv(input_file, output_dir, chunk_size=40):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Open the input CSV file
    with open(input_file, 'r') as csvfile:
        reader = csv.reader(csvfile)

        # Read the CSV header
        header = next(reader)

        # Initialize variables
        current_chunk_size = 0
        current_chunk_number = 1
        current_chunk_rows = []
        current_chunk_file = None

        for row in reader:
            # Calculate the size of the row in bytes
            row_size = len(','.join(row).encode('utf-8'))

            # If adding the row would exceed the chunk size, write the current chunk to a file
            if current_chunk_size + row_size > chunk_size * 1024 * 1024:
                if current_chunk_file:
                    current_chunk_file.close()

                current_chunk_file = open(os.path.join(output_dir, f'chunk_{current_chunk_number}.csv'), 'w', newline='')
                writer = csv.writer(current_chunk_file)

                # Write the header
                writer.writerow(header)

                # Write the rows
                writer.writerows(current_chunk_rows)

                # Reset variables for the next chunk
                current_chunk_size = 0
                current_chunk_number += 1
                current_chunk_rows = []

            # Add the row to the current chunk
            current_chunk_rows.append(row)
            current_chunk_size += row_size

        # Write the last chunk to a file
        if current_chunk_file:
            current_chunk_file.close()

        current_chunk_file = open(os.path.join(output_dir, f'chunk_{current_chunk_number}.csv'), 'w', newline='')
        writer = csv.writer(current_chunk_file)

        # Write the header
        writer.writerow(header)

        # Write the rows
        writer.writerows(current_chunk_rows)

        # Close the last chunk file
        current_chunk_file.close()

    print('CSV splitting completed successfully!')

split_csv(input_file, output_dir, chunk_size)


CSV splitting completed successfully!
