# Corpus: Chadwyck-Healey poetry collections

In [1]:
import sys
sys.path.append('../')
from generative_formalism import *

# Get the Chadwyck-Healey corpus path

print(f"Chadwyck-Healey corpus path: {PATH_CHADWYCK_HEALEY_TXT}")
print(f"Chadwyck-Healey metadata path: {PATH_CHADWYCK_HEALEY_METADATA}")

if PATH_CHADWYCK_HEALEY_TXT and os.path.exists(PATH_CHADWYCK_HEALEY_TXT):
    print("✓ Path exists")
else:
    print("⚠ Path does not exist or is not set")

Chadwyck-Healey corpus path: /Users/rj416/lltk_data/corpora/chadwyck_poetry/txt
Chadwyck-Healey metadata path: /Users/rj416/lltk_data/corpora/chadwyck_poetry/metadata.csv
✓ Path exists


In [2]:
# load metadata
df_meta = pd.read_csv(PATH_CHADWYCK_HEALEY_METADATA).fillna("").set_index('id')

In [15]:
df_meta.attdbase.value_counts()

attdbase
English Poetry                               163193
Twentieth-Century American Poetry             51546
American Poetry                               39546
Twentieth-Century English Poetry              28266
English Poetry 2nd Edition                    26930
Modern Poetry                                 11469
The Faber Poetry Library                       6347
Twentieth-Century African-American Poetry      5881
African-American Poetry                        3002
Name: count, dtype: int64

In [24]:
df_meta[~df_meta.attdbase.str.contains("Twentieth")].query('attrhyme!=""').shape[0] / df_meta[~df_meta.attdbase.str.contains("Twentieth")].shape[0]

0.9286270345367225

In [26]:
df_meta['author_dob'] = pd.to_numeric(df_meta.author_dob, errors='coerce')
df_meta['num_lines'] = pd.to_numeric(df_meta.num_lines, errors='coerce')

In [27]:
df_meta.query('(1600 <= author_dob <= 2000) & (10 <= num_lines <= 100)').shape

(204514, 103)

In [10]:
df_meta.a1.nunique()

4400

In [13]:
df_meta[df_meta.author_dob==1100].a1.unique()

array(['Anon., 1100-1500 (Middle English Lyrics and Ballads)',
       'Anon., 600-1100 (The Paris Psalter and The Meters of Boethius)',
       'Anon., 600-1100 (The Anglo Saxon Minor Poems)',
       'Anon., 1100-1500 (Middle English Romances)',
       'Anon., 600-1100 (The Exeter Book)',
       'Anon., 1100-1400 (Middle English Poetry)',
       'Anon., 1100-1500 (Middle English Romances) (trans.) / Colonne, Guido delle, 13th cent. (orig.)',
       'Anon., 600-1100 (The Vercelli Book)',
       'Anon., 600-1100 (The Junius Manuscript)',
       'Anon., 600-1100 (Beowulf and Judith)',
       'Anon., 1100-1500 (Middle English Romances) (trans.)',
       'Anon., 1100-1400 (Middle English Poetry) (trans.) / Grosseteste, Robert, 1175?-1253. (orig.)',
       'Anon., 1100-1500 (Middle English Romances) (trans.) / Couldrette, 14th/15th cent. (orig.)'],
      dtype=object)

In [14]:
df_meta.query('1600 <= author_dob <= 2000').attdbase.value_counts()

attdbase
English Poetry                               122047
American Poetry                               38775
Twentieth-Century American Poetry             37487
English Poetry 2nd Edition                    24308
Twentieth-Century English Poetry              19143
Modern Poetry                                  7950
The Faber Poetry Library                       3618
Twentieth-Century African-American Poetry      3433
African-American Poetry                        2549
Name: count, dtype: int64

In [4]:
# Show row for metadata
d_meta = {k:v for k,v in dict(df_meta.reset_index().sample(1).iloc[0]).items() if v!="" and k and k[0]!='_' and k!='Unnamed: 0'}
d_meta

{'id': 'english/russellt/Z300480002',
 'a1': 'Russell, Thomas, 1762-1788',
 'attautid': 1851.0,
 'attbytes': '2Kb',
 'attdbase': 'English Poetry',
 'attgenre': 'Sonnet',
 'attidref': 'E3074866',
 'attperi': 'Later Eighteenth-Century 1750-1799',
 'attpoet': 'Russell, Thomas, 1762-1788',
 'attpubn1': 1789.0,
 'attpubn2': 1789.0,
 'attrhyme': 'y',
 'attsize': '2Kb',
 'author': 'Russell, Thomas, 1762-1788',
 'author_dob': 1762.0,
 'author_dod': 1788.0,
 'author_gender': 'male',
 'collection': '1 2',
 'corpus': 'ChadwyckPoetry',
 'hi': 'In',
 'idref': 'Z300480002',
 'idz': 'Z300480002',
 'l': '&indent;Stern Chivalry her idle spear uphung,',
 'mainhead': 'SONNET. I.',
 'num_lines': np.int64(14),
 'pbl': 'Printed for D. Prince and J. Cooke, J. F. and C. Rivington, T. Cadell and T. and J. Egerton',
 'posthumous': True,
 'sombiog': 1851.0,
 't1': 'SONNET. I.',
 't2': 'Sonnets and Miscellaneous Poems (1789)',
 't3': 'Literature Online',
 'title': 'SONNET. I.',
 'ty': 'DATA',
 'volhead': 'Sonnets

In [5]:
get_chadwyck_corpus??

[0;31mSignature:[0m      
[0mget_chadwyck_corpus[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfields[0m[0;34m=[0m[0;34m{[0m[0;34m'id_hash'[0m[0;34m:[0m [0;34m'id_hash'[0m[0;34m,[0m [0;34m'attperi_str'[0m[0;34m:[0m [0;34m'period_meta'[0m[0;34m,[0m [0;34m'attdbase_str'[0m[0;34m:[0m [0;34m'subcorpus'[0m[0;34m,[0m [0;34m'author'[0m[0;34m:[0m [0;34m'author'[0m[0;34m,[0m [0;34m'author_dob'[0m[0;34m:[0m [0;34m'author_dob'[0m[0;34m,[0m [0;34m'title'[0m[0;34m:[0m [0;34m'title'[0m[0;34m,[0m [0;34m'year'[0m[0;34m:[0m [0;34m'year'[0m[0;34m,[0m [0;34m'num_lines'[0m[0;34m:[0m [0;34m'num_lines'[0m[0;34m,[0m [0;34m'volhead'[0m[0;34m:[0m [0;34m'volume'[0m[0;34m,[0m [0;34m'l'[0m[0;34m:[0m [0;34m'line'[0m[0;34m,[0m [0;34m'attrhyme'[0m[0;34m:[0m [0;34m'rhyme'[0m[0;34m,[0m [0;34m'attgenre'[0m[0;34m:[0m [0;34m'genre'[0m[0;34m}[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[

In [6]:
df = get_chadwyck_corpus()
df

Unnamed: 0_level_0,id_hash,period_meta,subcorpus,author,author_dob,title,year,num_lines,volume,line,rhyme,genre,period
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
english/bartonbe/Z200274127,685377,1800-1834 Early Nineteenth-Century,English Poetry,"Barton, Bernard, 1784-1849",1784.0,SONNET. II. [The night seems darkest ere the ...,1814,14,The Reliquary (1836),&indent;Rises with light and gladness on its w...,y,Sonnet,1750-1800
c20-american/am20129/Z300227191,160964,1900-1999 Twentieth-Century,American Poetry,"Pound, Ezra, 1885-1972",1885.0,LAMENT OF THE FRONTIER GUARD,1915,24,,"By the North Gate, the wind blows full of sand,",,,1850-1900
c20-english/ep20128/Z200582770,636642,1900-1999 Twentieth-Century,English Poetry,"Rodker, John, 1894-",1894.0,Married,1924,27,,This roof tree holds us,,,1850-1900
english-ed2/ep2527/Z300669174,832810,,English Poetry,"Barton, Emily M., 1817-1909",1817.0,Reply to the Question: “What is the Wealth of...,1847,31,Straws on the Stream: by E. M. B. (1910),"Tin, copper, iron; silver, gems untold?",y,,1800-1850
english/browniro/Z300289566,846436,1835-1869 Mid Nineteenth-Century,English Poetry,"Browning, Robert, 1812-1889",1812.0,HOLY&hyphen;CROSS DAY. ON WHICH THE JEWS WER...,1842,120,The Poetical Works (1888–94): DRAMATIC ROMANCES,Blessedest Thursday's the fat of the week.,y,,1800-1850
...,...,...,...,...,...,...,...,...,...,...,...,...,...
english/merival1/Z300428527,257805,1800-1834 Early Nineteenth-Century,English Poetry,"Merivale, John Herman, 1779-1844",1779.0,FROM CHATTERTON'S “ÆLLA.”,1809,48,Poems original and translated (1844),&indent;&indent;The meads are sprinkled with a...,y,,1750-1800
english/cowperwi/Z300323182,756129,1750-1799 Later Eighteenth-Century,English Poetry,"Cowper, William, 1731-1800",1731.0,SELF&hyphen;LOVE AND TRUTH INCOMPATIBLE.,1761,32,The Works (1835–1837): TRANSLATIONS FROM THE F...,That fill'd my soul with fear and shame;,y,Lyric,1700-1750
english/colersam/Z300317557,61151,1800-1834 Early Nineteenth-Century,English Poetry,"Coleridge, Samuel Taylor, 1772-1834",1772.0,34 EPITAPH ON A BAD MAN,1802,12,The Complete Poetical Works (1912),&indent;This sad brief tale is all that Truth ...,y,Epitaph,1750-1800
american/am0232/Z200151866,361996,1835-1869 Mid Nineteenth-Century,American Poetry,"Osgood, Frances Sargent Locke, 1811-1850",1811.0,LITTLE MAY. SUGGESTED BY A CONVERSATION WITH ...,1841,28,"[Poems, in] The memento (1849)",&indent;Till you are ready too!,y,,1800-1850


In [9]:
# Now cached
df = get_chadwyck_corpus()
len(df)

259310

In [7]:
df.subcorpus.value_counts()

subcorpus
English Poetry              165498
American Poetry              76262
Modern Poetry                 7950
African-American Poetry       5982
The Faber Poetry Library      3618
Name: count, dtype: int64

In [8]:
df.period_meta.value_counts().sort_index()

period_meta
                                               26867
1500-1580 Tudor                                  187
1500-1700 Emblems, Epigrams, Formal Satires     1450
1500-1700 Songbooks                                8
1550-1900 Miscellanies and Collections          1958
                                               ...  
1860-1880 Mid-Victorian                           98
1870-1899 Later Nineteenth-Century             36147
1880-1901 Late Victorian                         150
1900-1999 Twentieth-Century                    74447
1901-1914 Edwardian Period                         2
Name: count, Length: 17, dtype: int64

In [9]:
df.period.value_counts().sort_index()

period
1600-1650    15406
1650-1700    10981
1700-1750    21103
1750-1800    37156
1800-1850    78885
1850-1900    39581
1900-1950    47902
1950-2000     8296
Name: count, dtype: int64

In [10]:
ATTPERI_MIN = 5000

dfq=df[df.period_meta!=""].query('10<=num_lines<=100').groupby('period').filter(lambda x: len(x)>=ATTPERI_MIN)
dfq.period.value_counts().sort_index()

period
1600-1650     8393
1650-1700     7146
1700-1750    13403
1750-1800    28756
1800-1850    56599
1850-1900    23420
1900-1950    38665
1950-2000     7409
Name: count, dtype: int64

In [11]:
dfq.groupby(['period', 'subcorpus','rhyme']).size()

period     subcorpus                 rhyme
1600-1650  American Poetry           y         360
                                     y n         1
           English Poetry            n           3
                                     y        8028
                                     y n         1
                                              ... 
1950-2000  African-American Poetry             820
           American Poetry                    2251
           English Poetry                     2681
           Modern Poetry                      1041
           The Faber Poetry Library            616
Length: 62, dtype: int64

In [12]:
def add_txt_from_chadwyck(df):
    def get_txt(id):
        fn = os.path.join(PATH_CHADWYCK_HEALEY_TXT, id) + '.txt'
        if os.path.exists(fn):
            with open(fn) as f:
                return f.read().strip()
        return ""
        
    df['txt'] = [get_txt(x) for x in tqdm(df.index)]


def get_sampled_chadwyck_corpus(
        df,
        min_lines=10, 
        max_lines=100,
        n_per_sample=1000,
        min_period_count=ATTPERI_MIN,
        period_len=100,
        min_period=1600,
        max_period=2000,
        period_by='period',
        period_by_num=False,
        with_txt=False,
        sample_by=['period', 'subcorpus','rhyme'],
        ):
    # sort by id hash
    df = df.sort_values(by='id_hash')

    if min_lines and max_lines: 
        df=df.query(f'{min_lines}<=num_lines<={max_lines}')
    
    if n_per_sample:
        if period_by_num:    
            df['period'] = df[period_by].apply(lambda x: x//period_len*period_len if x else None)
            df = df.query(f'{min_period}<=period<={max_period}')
            df['period'] = df['period'].astype(int).apply(lambda x: f'{x}-{x+period_len}')
        else:
            df=df[df[period_by]!=""]
            df['period'] = df[period_by].apply(str)


        if min_period_count:
            df = df.groupby('period').filter(lambda x: len(x) >= min_period_count)

        df = df.groupby(sample_by).head(n_per_sample)
    
    if with_txt:
        add_txt_from_chadwyck(df)
    
    return df
        

In [13]:
df_smpl_by_period = get_sampled_chadwyck_corpus(
    df,
    with_txt=True,
    sample_by=['period']
)
df_smpl_by_period

100%|██████████| 8000/8000 [00:02<00:00, 3399.26it/s]


Unnamed: 0_level_0,id_hash,period_meta,subcorpus,author,author_dob,title,year,num_lines,volume,line,rhyme,genre,period,txt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
c20-english/ep20152/Z200586158,2,1900-1999 Twentieth-Century,English Poetry,"Rosenberg, Isaac, 1890-1918",1890.0,‘I KNOW YOU GOLDEN’,1920,12,,I know you golden,,,1850-1900,I know you golden\nAs summer and pale\nAs the ...
english/kerpeter/Z300410015,3,1660-1700 Restoration,English Poetry,"Ker, Patrick, fl. 1691",1691.0,On the Memory of a Married Maid.,1721,16,Flosculum Poeticum (1684),A Marrie'd&hyphen;Virgin to remain.,y,,1650-1700,"Within this Coffin here does lie,\nA Pattern o..."
american/am1258/Z200196105,7,1835-1869 Mid Nineteenth-Century,American Poetry,"Emerson, Ralph Waldo, 1803-1882",1803.0,SEPTEMBER,1833,16,Poems [1904],"&indent;Of a gusty Autumn day,",y,,1800-1850,In the turbulent beauty\n Of a gusty Autumn...
english/gilfilla/Z400379001,8,1800-1834 Early Nineteenth-Century,English Poetry,"Gilfillan, Robert, 1798-1850",1798.0,NORWEGIAN SMUGGLER'S SONG.,1828,36,Poems and Songs (1851),"&indent;The storm is loud and high,",y,,1750-1800,"Awake, you midnight mariners!\n The storm i..."
english/wattwill/Z300523577,18,1800-1834 Early Nineteenth-Century,English Poetry,"Watt, William, 1793-1859",1793.0,BAB AT THE BOWSTER.,1823,40,Poems and Songs (1860),Wi' touslet hair and drowsy een?,y,Ballad,1750-1800,"Lassie, whare were you yestreen,\nWi' touslet ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
modern/oxf0901/Z200245175,133354,1900-1999 Twentieth-Century,Modern Poetry,"Oswald, Alice, 1966-",1966.0,MOUNTAINS,1996,20,,"Something is in the line and air along edges,",,,1950-2000,"Something is in the line and air along edges,\..."
c20-african-american/da22011/Z300262758,133390,1900-1999 Twentieth-Century,African-American Poetry,"Jackson, Angela, 1951-",1951.0,So This Is How the Women,1981,18,,So this is how the women,,,1950-2000,So this is how the women\ngods lost divinity o...
faber/fa1201/Z300559865,133416,1900-1999 Twentieth-Century,The Faber Poetry Library,"Hofmann, Michael, 1957-",1957.0,Day of Reckoning,1987,17,,"When we drove across America, going West,",,,1950-2000,"When we drove across America, going West,\nI t..."
c20-american/am20091/Z300216207,133610,1900-1999 Twentieth-Century,American Poetry,"Wojahn, David, 1953-",1953.0,"8. Fab Four Tour Deutschland: Hamburg, 1961",1983,22,,"The emcee oozes pomade, affecting the hip Amer...",,,1950-2000,"""Und now Ladies und Gentlemun, Der Peedles!""\n..."


In [14]:
df_smpl_by_rhyme = get_sampled_chadwyck_corpus(
    df[df.rhyme.isin(['y','n'])],
    with_txt=True,
    sample_by=['rhyme']
)
df_smpl_by_rhyme

100%|██████████| 2000/2000 [00:00<00:00, 7924.43it/s] 


Unnamed: 0_level_0,id_hash,period_meta,subcorpus,author,author_dob,title,year,num_lines,volume,line,rhyme,genre,period,txt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
english/kerpeter/Z300410015,3,1660-1700 Restoration,English Poetry,"Ker, Patrick, fl. 1691",1691.0,On the Memory of a Married Maid.,1721,16,Flosculum Poeticum (1684),A Marrie'd&hyphen;Virgin to remain.,y,,1650-1700,"Within this Coffin here does lie,\nA Pattern o..."
american/am1258/Z200196105,7,1835-1869 Mid Nineteenth-Century,American Poetry,"Emerson, Ralph Waldo, 1803-1882",1803.0,SEPTEMBER,1833,16,Poems [1904],"&indent;Of a gusty Autumn day,",y,,1800-1850,In the turbulent beauty\n Of a gusty Autumn...
english/gilfilla/Z400379001,8,1800-1834 Early Nineteenth-Century,English Poetry,"Gilfillan, Robert, 1798-1850",1798.0,NORWEGIAN SMUGGLER'S SONG.,1828,36,Poems and Songs (1851),"&indent;The storm is loud and high,",y,,1750-1800,"Awake, you midnight mariners!\n The storm i..."
english/wattwill/Z300523577,18,1800-1834 Early Nineteenth-Century,English Poetry,"Watt, William, 1793-1859",1793.0,BAB AT THE BOWSTER.,1823,40,Poems and Songs (1860),Wi' touslet hair and drowsy een?,y,Ballad,1750-1800,"Lassie, whare were you yestreen,\nWi' touslet ..."
english-ed2/ep2555/Z300670851,19,,English Poetry,"Evans, George Essex, 1863-1909",1863.0,ODE FOR COMMONWEALTH DAY,1893,72,The Collected Verse of G. Essex Evans: Memoria...,&indent;Are beating at the Gates of Day!,y,,1850-1900,Awake! Arise! The wings of dawn\n Are beati...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
american/am1185/Z200192771,235811,1870-1899 Later Nineteenth-Century,American Poetry,"Crane, Stephen, 1871-1900",1871.0,[To the maiden],1901,10,The University of Virginia Edition of The Work...,To the maiden,n,,1850-1900,To the maiden\nThe sea was blue meadow\nAlive ...
american/am0338/Z300156539,235983,1870-1899 Later Nineteenth-Century,American Poetry,"Wilcox, Ella Wheeler, 1850-1919",1850.0,IMMORTALITY,1880,32,Poetical works [1917],"By slow self&hyphen;conquest, comradeship with...",n,,1850-1900,"Immortal life is something to be earned,\nBy s..."
english/tennysal/Z300506928,236040,1835-1869 Mid Nineteenth-Century,English Poetry,"Tennyson, Alfred Tennyson, Baron, 1809-1892",1809.0,SPECIMEN OF A TRANSLATION OF THE ILIAD IN BLAN...,1839,22,The Works (1907–1908),"Then loosed their sweating horses from the yoke,",n,,1800-1850,So Hector spoke; the Trojans roared applause;\...
english/landonla/Z300412008,236787,1835-1869 Mid Nineteenth-Century,English Poetry,"L. E. L. (Letitia Elizabeth Landon), 1802-1838",1802.0,INFLUENCE OF POETRY,1832,10,Life and Literary Remains (1841),This is the charm of poetry: it comes,n,,1800-1850,This is the charm of poetry: it comes\nOn sad ...


In [15]:
df_smpl_by_period_subcorpus = get_sampled_chadwyck_corpus(
    df,
    with_txt=True,
    sample_by=['period','subcorpus']
)
df_smpl_by_period_subcorpus

100%|██████████| 22718/22718 [00:02<00:00, 8286.32it/s]


Unnamed: 0_level_0,id_hash,period_meta,subcorpus,author,author_dob,title,year,num_lines,volume,line,rhyme,genre,period,txt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
c20-english/ep20152/Z200586158,2,1900-1999 Twentieth-Century,English Poetry,"Rosenberg, Isaac, 1890-1918",1890.0,‘I KNOW YOU GOLDEN’,1920,12,,I know you golden,,,1850-1900,I know you golden\nAs summer and pale\nAs the ...
english/kerpeter/Z300410015,3,1660-1700 Restoration,English Poetry,"Ker, Patrick, fl. 1691",1691.0,On the Memory of a Married Maid.,1721,16,Flosculum Poeticum (1684),A Marrie'd&hyphen;Virgin to remain.,y,,1650-1700,"Within this Coffin here does lie,\nA Pattern o..."
american/am1258/Z200196105,7,1835-1869 Mid Nineteenth-Century,American Poetry,"Emerson, Ralph Waldo, 1803-1882",1803.0,SEPTEMBER,1833,16,Poems [1904],"&indent;Of a gusty Autumn day,",y,,1800-1850,In the turbulent beauty\n Of a gusty Autumn...
english/gilfilla/Z400379001,8,1800-1834 Early Nineteenth-Century,English Poetry,"Gilfillan, Robert, 1798-1850",1798.0,NORWEGIAN SMUGGLER'S SONG.,1828,36,Poems and Songs (1851),"&indent;The storm is loud and high,",y,,1750-1800,"Awake, you midnight mariners!\n The storm i..."
english/wattwill/Z300523577,18,1800-1834 Early Nineteenth-Century,English Poetry,"Watt, William, 1793-1859",1793.0,BAB AT THE BOWSTER.,1823,40,Poems and Songs (1860),Wi' touslet hair and drowsy een?,y,Ballad,1750-1800,"Lassie, whare were you yestreen,\nWi' touslet ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
faber/fa0401/Z300557474,999109,1900-1999 Twentieth-Century,The Faber Poetry Library,"Boyle, Charles, 1951-",1951.0,(i) Underground,1981,18,,A woman sleeping on the underground:,,,1950-2000,"A woman sleeping on the underground:\nneat, As..."
c20-african-american/da20076/Z300330273,999377,1900-1999 Twentieth-Century,African-American Poetry,"Weaver, Michael S., 1951-",1951.0,Duke Ellington and His Mistress Make Love,1981,34,,I draw the sheets written with life,,,1950-2000,I draw the sheets written with life\naround me...
african-american/hortonge/Z200399812,999379,1835-1869 Mid Nineteenth-Century,African-American Poetry,"Horton, George Moses, 1798?-ca.1880",1798.0,THE POWERS OF LOVE.,1828,35,Naked Genius (1865),It lifts the poor man from his cell,y,,1750-1800,It lifts the poor man from his cell\n To fo...
c20-african-american/da22011/Z300262800,999421,1900-1999 Twentieth-Century,African-American Poetry,"Jackson, Angela, 1951-",1951.0,"george, after all, means farmer",1981,39,,he carried a tomato plant &,,,1950-2000,he carried a tomato plant &\nwatermelon\nacros...


In [16]:
# save_sample(df_smpl_by_rhyme, '../data/corpus_sample_by_rhyme.csv.gz')
# save_sample(df_smpl_by_period, '../data/corpus_sample_by_period.csv.gz')
save_sample(df_smpl_by_period_subcorpus, '../data/corpus_sample_by_period_subcorpus.csv.gz')

Saved sample to ../data/corpus_sample_by_period_subcorpus.csv.gz
