In [3]:
import csv
import os


In [4]:


def split_csv(source_filepath: str, dest_path: str, result_filename_prefix: str, row_limit: int):
    """
    Split a source CSV into multiple CSVs of equal numbers of records,
    except the last file.
    The initial file's header row will be included as a header row in each split
    file.
    Split files follow a zero-index sequential naming convention like so:
        `{result_filename_prefix}_0.csv`
    :param source_filepath {str}:
        File name (including full path) for the file to be split.
    :param dest_path {str}:
        Full path to the directory where the split files should be saved.
    :param result_filename_prefix {str}:
        File name to be used for the generated files.
        Example: If `my_split_file` is provided as the prefix, then a resulting
                 file might be named: `my_split_file_1.csv'
    :param row_limit {int}:
        Number of rows per file (header row is excluded from the row count).
    :return {NoneType}:
    """
    if row_limit <= 0:
        raise Exception('row_limit must be > 0')

    with open(source_filepath, 'r') as source:
        reader = csv.reader(source)
        headers = next(reader)

        file_number = 1
        records_exist = True

        while records_exist:

            i = 0
            target_filename = f'{result_filename_prefix}_{file_number}.csv'
            target_filepath = os.path.join(dest_path, target_filename)

            with open(target_filepath, 'w') as target:
                writer = csv.writer(target)

                while i < row_limit:
                    if i == 0:
                        writer.writerow(headers)

                    try:
                        writer.writerow(next(reader))
                        i += 1
                    except:
                        records_exist = False
                        break

            if i == 0:
                # we only wrote the header, so delete that file
                os.remove(target_filepath)

            file_number += 1

In [5]:
split_csv(source_filepath = 'nba_tweets.csv',
          dest_path =  'csv_split/',
          result_filename_prefix = 'my_new_split_file',
          row_limit = 5000 )

In [None]:
import pandas as pd

# http://acepor.github.io/2017/08/03/using-chunksize/
split1 = pd.read_csv('csv_split/my_new_split_file_1.csv')
split2 = pd.read_csv('csv_split/my_new_split_file_2.csv')
split3 = pd.read_csv('csv_split/my_new_split_file_3.csv')
split4 = pd.read_csv('csv_split/my_new_split_file_4.csv')
split5 = pd.read_csv('csv_split/my_new_split_file_5.csv')