In [None]:
import pandas as pd

In [None]:
def read_chunked_month(file_name, **read_kwargs):
    ''' Break up reading large file by month. Requires ordered data. '''
    for chunk in pd.read_csv(file_name, **read_kwargs):
        for group in sorted(chunk.groupby('Month')):
            yield group

def read(file_name, **read_kwargs):
    current_month, current_df = None, None
    for month, df in read_chunked_month(file_name, **read_kwargs):
        assert (df.Month == month).all()
        if current_month is None:
            current_month, current_df = month, df
        else:
            assert month >= current_month
            if month == current_month:
                current_df = pd.concat([current_df, df])
            else:
                yield current_month, current_df
                current_month, current_df = month, df
    yield current_month, current_df

In [None]:
for month, df in read(
        '/home/simon/Downloads/airline_data/1988.csv.bz2',
        compression='bz2', chunksize=100000):
    print(month, df.shape)

In [None]:
for month, df in read(
        '/home/simon/Downloads/airline_data/1988.csv.bz2',
        compression='bz2', chunksize=1000000):
    print(month, df.shape)