In [2]:
import csv

def get_poem_dataset_lookup():
    """
    Return a dictionary with poems as key and their respective dataset association as value
    """
    poem_dataset_lookup = {}
    with open("data_poems/consolidated_batches.csv", "r") as f:
        csv_reader = csv.reader(f)
        for row in csv_reader:
            poem1 = row[1]
            poem2 = row[2]
            dataset1 = row[3]
            dataset2 = row[4]
            
            if poem1.endswith(" <br>"):
                poem1 = poem1[:-5]
            if poem2.endswith(" <br>"):
                poem2 = poem2[:-5]
            
            poem_dataset_lookup[poem1] = dataset1
            poem_dataset_lookup[poem2] = dataset2
    return poem_dataset_lookup
        

def get_scores(method):
    """
    Retrieves the score generated from a specific method
    """
    poem_scores = {}
    poem_dataset_lookup = get_poem_dataset_lookup()
    with open("scores/" + method + "_subset.csv", "r") as f:
        csv_reader = csv.reader(f)
        header = next(csv_reader)
        for row in csv_reader:
            poem = row[0]
            if poem.endswith(" <br>"):
                poem = poem[:-5]
            dataset = poem_dataset_lookup[poem]
            scores = {"poem": poem, "dataset": dataset}
            for idx, score in enumerate(row[1:], 1):
                cat = method + "_" + header[idx]
                scores[cat] = score
            poem_scores[poem] = scores 
    return poem_scores, header
                
method_scores = []
for method in ['bertranker', 'crowdgppl', 'bws', 'gppl']:
    method_scores.append(get_scores(method)[0])
    

In [3]:
# Merge all dictionaries into one on poem basis
cons_dicts = {}
for poem in method_scores[0]:
    cons_dicts[poem] = method_scores[0][poem]
    for d in method_scores[1:]:
        current_scores = cons_dicts[poem]
        other_scores = d[poem]
        current_scores = {**current_scores, **other_scores}
        cons_dicts[poem] = current_scores        
        


In [4]:
import csv 
import math

fieldnames = list(cons_dicts[list(cons_dicts.keys())[0]].keys())
# Write all score into one large file
with open("scores/consolidated_single_poems.csv", "w+") as f:
    writer = csv.writer(f)
    writer.writerow(fieldnames)
    for p in cons_dicts:
        line = []
        for col in fieldnames:
            try:
                value = cons_dicts[p][col]
                line.append(value)
            except:
                print(col)
                print(p)
        writer.writerow(line)


['poem', 'dataset', 'bertranker_all', 'bertranker_coherent', 'bertranker_grammatical', 'bertranker_melodious', 'bertranker_moved', 'bertranker_real', 'bertranker_rhyming', 'bertranker_readable', 'bertranker_comprehensible', 'bertranker_intense', 'bertranker_liking', 'crowdgppl_all', 'crowdgppl_coherent', 'crowdgppl_grammatical', 'crowdgppl_melodious', 'crowdgppl_moved', 'crowdgppl_real', 'crowdgppl_rhyming', 'crowdgppl_readable', 'crowdgppl_comprehensible', 'crowdgppl_intense', 'crowdgppl_liking', 'bws_all', 'bws_coherent', 'bws_grammatical', 'bws_melodious', 'bws_moved', 'bws_real', 'bws_rhyming', 'bws_readable', 'bws_comprehensible', 'bws_intense', 'bws_liking', 'gppl_all', 'gppl_coherent', 'gppl_grammatical', 'gppl_melodious', 'gppl_moved', 'gppl_real', 'gppl_rhyming', 'gppl_readable', 'gppl_comprehensible', 'gppl_intense', 'gppl_liking']
46


In [5]:
import csv
import math 
mins = [math.inf]*44
maxs = [-math.inf]*44

lines = []
with open("scores/consolidated_single_poems.csv") as f:
    csv_reader = csv.reader(f)
    header = next(csv_reader)
    for row in csv_reader:
        lines.append(row)
        for i, num in enumerate(row[2:]):
            mins[i] = min(mins[i], float(num))
            maxs[i] = max(maxs[i], float(num))
print(mins)
print(maxs)


normalized_lines = []
for line in lines:
    norm_line = line[:2]
    for i, num in enumerate(line[2:]):
        norm_val = (float(num) - mins[i])/(maxs[i] - mins[i])
        norm_line.append(norm_val)
    normalized_lines.append(norm_line)

with open("scores/normalized_scores.csv", "w+") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(header)
    for line in normalized_lines:
        csv_writer.writerow(line)
    
    
        
    
    

[-0.2329796403646469, -0.25375258922576904, -0.2615964710712433, -0.2625853419303894, -0.25882112979888916, -0.26394417881965637, -0.25420114398002625, -0.2670304775238037, -0.260204017162323, -0.25886502861976624, -0.25799575448036194, -4.87951561733533, -4.87951561733533, -3.0437596331222236, -4.467384979067327, -5.503429999828406, -3.8889251577550006, -4.920130068629585, -5.725828063649544, -4.214829459834426, -3.484094375177864, -3.954453944379249, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492, -1.0623027755847492]
[-0.23271417617797852, -0.23600099980831146, -0.24809706211090088, -0.24998334050178528, -0.24576006829738617, -0.25107160210609436, -0.2386636883020401, -0.25433170795440674, -0.24786609411239624, -0.24846740067005157, -0.24420931935310364, 4.06

In [6]:
dataset_lookup = get_poem_dataset_lookup()
datasets = set(list(dataset_lookup.values()))

dataset_samples = {}
for ds in datasets:
    print(ds)
    # Get 10 samples for each ds
    samples = []
    for poem, dataset in dataset_lookup.items():
        if dataset == ds:
            if ds == "jhamtani":
                if max([len(line) for line in poem.split("<br>")]) > 42:
                    continue
            p = poem.replace("<br>", "\n")
            samples.append(p)
        if len(samples) >= 10:
            break
    dataset_samples[ds] = samples

for key in dataset_samples:
    print(f"~~~~~~~~~~~ {key} ~~~~~~~~~~~")
    for sample in dataset_samples[key]:
        print("=========")
        print(sample)
        print("=========")
    print("\n\n")
            

jhamtani
gpt2
gutenberg
lstm
ngram
hafez
true_poetry
deepspeare
~~~~~~~~~~~ jhamtani ~~~~~~~~~~~
to look a farther than a foemen be 
she with me , and in countenance i fall 
me for her , sin tis ever worse than all 
but kind of life to grace and genius came
followed my boy , to whom the beggar lies 
o ye of england ! for ten times lord 
the same with sweetest and troubled eyes 
is to believe all the world in stress
and call him there , he 's never ending 
the world of life may make and grate unto 
from the mound of pure or orbed rain 
seeing , the bard , and waving that intent
but care with me the burthen of my life 
mary with fit hand rather than to pain 
i seem for other is content , though rife 
oh ! i , at life thereof my days outworn
a loyal anger to the lips of sea 
and glorious just , and mad with me 
and make the worst to take it out , to me 
in chase themselves upon the other throng
and pall their singing wail the door 
the furious kine climb inland he 
the heads bread in the 