# Rhyme

In [2]:
# !pip install -r ../requirements.txt
import sys
sys.path.append('../')
from generative_formalism import *

In [3]:
documentation(get_rhythm_for_shakespeare_sonnets)
df_shak_rhythm = get_rhythm_for_shakespeare_sonnets()
df_shak_rhythm

##### `get_rhythm_for_shakespeare_sonnets`

```md
Load and analyze rhythm in Shakespeare's sonnets.

    Reads the complete text of Shakespeare's sonnets, splits into individual poems,
    and computes rhythm measurements for each sonnet.

    Parameters
    ----------
    force : bool, default=False
        If True, re-parse sonnets even if cached data exists.

    Returns
    -------
    pd.DataFrame
        DataFrame with rhythm measurements for each of the 154 sonnets,
        indexed by sonnet ID (e.g., 'shakespeare_sonnet_001').

    Calls
    -----
    - get_id_hash(s) [for each sonnet text]
    - get_rhythm_for_txt(txt, force=force) [for each sonnet]
    
```
----


* Getting rhythm for shakespeare sonnets: 100%|██████████| 154/154 [00:00<00:00, 219.92it/s]


Unnamed: 0_level_0,id_hash,txt,is_iambic_pentameter,is_unambigously_iambic_pentameter,syll01_stress,syll02_stress,syll03_stress,syll04_stress,syll05_stress,syll06_stress,syll07_stress,syll08_stress,syll09_stress,syll10_stress,forth_syllable_stressed,perc_ww_in_meter
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
shakespeare_sonnet_001,229170,"FROM fairest creatures we desire increase,\nTh...",0.357143,0.142857,0.214286,0.642857,0.214286,0.928571,0.214286,0.500000,0.285714,0.785714,0.285714,0.692308,0.928571,0.061538
shakespeare_sonnet_002,499179,"When forty winters shall beseige thy brow,\nAn...",0.714286,0.428571,0.071429,0.714286,0.214286,0.928571,0.214286,0.714286,0.285714,0.928571,0.071429,0.846154,0.928571,0.037879
shakespeare_sonnet_003,189136,"Look in thy glass, and tell the face thou view...",0.357143,0.071429,0.285714,0.642857,0.071429,0.785714,0.000000,0.785714,0.071429,0.642857,0.214286,0.692308,0.785714,0.060150
shakespeare_sonnet_004,645721,"Unthrifty loveliness, why dost thou spend\nUpo...",0.500000,0.214286,0.285714,0.571429,0.214286,0.857143,0.071429,0.785714,0.071429,0.500000,0.142857,0.833333,0.857143,0.070866
shakespeare_sonnet_005,845739,"Those hours, that with gentle work did frame\n...",0.500000,0.285714,0.357143,0.642857,0.071429,0.785714,0.142857,0.785714,0.071429,0.714286,0.285714,0.714286,0.785714,0.059701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shakespeare_sonnet_150,117333,"O, from what power hast thou this powerful mig...",0.642857,0.142857,0.000000,0.642857,0.000000,0.714286,0.071429,0.785714,0.142857,0.714286,0.214286,0.785714,0.714286,0.036765
shakespeare_sonnet_151,796534,Love is too young to know what conscience is;\...,0.428571,0.142857,0.214286,0.571429,0.214286,0.714286,0.214286,0.785714,0.357143,0.714286,0.142857,0.714286,0.714286,0.062500
shakespeare_sonnet_152,284024,"In loving thee thou know'st I am forsworn,\nBu...",0.357143,0.214286,0.071429,0.571429,0.071429,0.928571,0.357143,0.857143,0.071429,0.571429,0.357143,0.928571,0.928571,0.036232
shakespeare_sonnet_153,852282,"Cupid laid by his brand, and fell asleep:\nA m...",0.571429,0.285714,0.071429,0.785714,0.428571,0.785714,0.142857,0.785714,0.214286,0.785714,0.230769,1.000000,0.785714,0.030303


In [7]:
documentation(get_rhythm_for_sample)

df_smpl = get_chadwyck_corpus_sampled_by('sonnet_period')
df_smpl_rhythm = get_rhythm_for_sample(df_smpl)

##### `get_rhythm_for_sample`

```md
Extract rhythm measurements for a sample of poems.

    Computes rhythm measurements (meter, stress patterns, etc.) for each poem
    in the sample, returning a DataFrame with one row per poem.

    Parameters
    ----------
    df_smpl : pd.DataFrame
        DataFrame containing poem texts in a 'txt' column, indexed by poem IDs.
    stash : HashStash, default=STASH_RHYTHM
        Cache storage for parsed data.
    force : bool, default=False
        If True, re-parse even if cached data exists.
    gen : bool, default=True
        If True, generate new parses; if False, only use cached data.
    verbose : bool, default=DEFAULT_VERBOSE
        If True, show progress information.
    **kwargs
        Additional keyword arguments (unused).

    Returns
    -------
    pd.DataFrame
        DataFrame with rhythm measurements, indexed by poem ID, or empty
        DataFrame if no valid measurements found.

    Calls
    -----
    - _clean_df(df_smpl)
    - get_rhythm_for_txt(txt, stash=stash, force=force, postprocess=True) [if gen=True]
    - postprocess_parses_data(stash.get(txt)) [if gen=False]
    
```
----


* Getting rhythm for sample: 100%|██████████| 999/999 [00:05<00:00, 175.67it/s]
