# Rhyme

In [1]:
# !pip install -r ../requirements.txt
import sys
sys.path.append('../')
from generative_formalism import *
import plotnine as p9
pd.options.display.max_rows = 25
p9.options.dpi=300
p9.options.figure_size=(10,5)

In [2]:
txt = """
From fairest creatures we desire increase,
That thereby beauty’s rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory;
But thou, contracted to thine own bright eyes,
Feed’st thy light’s flame with self-substantial fuel,
Making a famine where abundance lies,
Thyself thy foe, to thy sweet self too cruel.
Thou that art now the world’s fresh ornament
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And, tender churl, mak’st waste in niggarding.
   Pity the world, or else this glutton be,
   To eat the world’s due, by the grave and thee.
"""

get_rhyme_for_txt(txt)

{'num_rhyming_lines': 11,
 'num_perfectly_rhyming_lines': 5,
 'num_lines': 14,
 'rhyming_line_pairs': [('From fairest creatures we desire increase,',
   'But as the riper should by time decease,',
   0.7637626158259733),
  ('But thou, contracted to thine own bright eyes,',
   'Making a famine where abundance lies,',
   0),
  ("Feed'st thy light's flame with self-substantial fuel,",
   'Thyself thy foe, to thy sweet self too cruel.',
   0.7637626158259733),
  ("Thou that art now the world's fresh ornament",
   'Within thine own bud buriest thy content,',
   0.8539125638299666),
  ('His tender heir might bear his memory;',
   'Pity the world, or else this glutton be,',
   0),
  ('Pity the world, or else this glutton be,',
   "To eat the world's due, by the grave and thee.",
   0)]}

In [3]:
df_smpl = get_chadwyck_corpus_sampled_by_rhyme_as_in_paper()
df_rhyme_data_in_paper = get_rhyme_for_sample(df_smpl)

100%|██████████| 2000/2000 [01:21<00:00, 24.40it/s]


## Testing rhyme measurement

In [4]:
def get_rhyming_accuracy_by_rhyme_threshold(df, pred_by=RHYME_PRED_FEATURE):
    df=df.copy()
    df=df[df.rhyme.isin({'y','n'})]
    ld=[]
    for opt in df[pred_by].unique():
        df['rhyme_pred'] = df[pred_by].apply(lambda x: x>=opt)
        d=get_pred_stats(df.rhyme_pred, df.rhyme_bool)
        d['support'] = len(df)
        ld.append({'pred_by':pred_by, 'opt':opt, **d})
    return pd.DataFrame(ld).groupby(['pred_by','opt']).median()

In [5]:
df_preds_in_paper = get_rhyming_accuracy_by_rhyme_threshold(df_rhyme_data_in_paper).sort_values('f1_score', ascending=False)
df_preds_in_paper

Unnamed: 0_level_0,Unnamed: 1_level_0,f1_score,precision,recall,accuracy,true_positives,false_positives,true_negatives,false_negatives,support
pred_by,opt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
num_perfectly_rhyming_lines_per10l,4,0.889109,0.880392,0.898,0.888,898.0,122.0,878.0,102.0,2000.0
num_perfectly_rhyming_lines_per10l,3,0.881293,0.818884,0.954,0.8715,954.0,211.0,789.0,46.0,2000.0
num_perfectly_rhyming_lines_per10l,5,0.840471,0.904378,0.785,0.851,785.0,83.0,917.0,215.0,2000.0
num_perfectly_rhyming_lines_per10l,2,0.82501,0.710774,0.983,0.7915,983.0,400.0,600.0,17.0,2000.0
num_perfectly_rhyming_lines_per10l,6,0.760282,0.921652,0.647,0.796,647.0,55.0,945.0,353.0,2000.0
num_perfectly_rhyming_lines_per10l,1,0.746336,0.597833,0.993,0.6625,993.0,668.0,332.0,7.0,2000.0
num_perfectly_rhyming_lines_per10l,0,0.666667,0.5,1.0,0.5,1000.0,1000.0,0.0,0.0,2000.0
num_perfectly_rhyming_lines_per10l,7,0.566064,0.929224,0.407,0.688,407.0,31.0,969.0,593.0,2000.0
num_perfectly_rhyming_lines_per10l,8,0.34931,0.930736,0.215,0.5995,215.0,16.0,984.0,785.0,2000.0
num_perfectly_rhyming_lines_per10l,9,0.128492,0.932432,0.069,0.532,69.0,5.0,995.0,931.0,2000.0


In [6]:
def get_rhyming_preds_table(df_preds, save_latex_to=None):
    df_preds_tbl = df_preds.reset_index().drop('pred_by',axis=1)[['opt','precision','recall','f1_score',]].set_index('opt').sort_index()
    df_preds_tbl.columns = ['Precision', 'Recall', 'F1 score']
    df_preds_tbl.rename_axis('# Rhymes per 10 lines', inplace=True)
    df_preds_tbl = df_preds_tbl.round(2).applymap(lambda x: f'{x*100:.0f}%')
    
    if save_latex_to:
        df_preds_tbl_latex = df_preds_tbl.copy()
        df_preds_tbl_latex.rename_axis(df_preds_tbl_latex.index.name.replace('#', '\#'), inplace=True)
        df_preds_tbl_latex.columns = [x.replace('#', '\#') for x in df_preds_tbl_latex.columns]

        os.makedirs(os.path.dirname(save_latex_to), exist_ok=True)
        df_preds_tbl_latex.to_latex(save_latex_to)
    
    return df_preds_tbl

df_preds_tbl_in_paper = get_rhyming_preds_table(df_preds_in_paper, save_latex_to=os.path.join(PATH_TEX, 'table_5.rhyme_accuracy.tex'))
df_preds_tbl_in_paper

Unnamed: 0_level_0,Precision,Recall,F1 score
# Rhymes per 10 lines,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,50%,100%,67%
1,60%,99%,75%
2,71%,98%,83%
3,82%,95%,88%
4,88%,90%,89%
5,90%,78%,84%
6,92%,65%,76%
7,93%,41%,57%
8,93%,22%,35%
9,93%,7%,13%
