In [None]:
import pandas as pd
from rouge import Rouge

In [None]:
df = pd.read_json('/Users/karpovapolina/Downloads/data.json')
df = df.T
df['name'] = df.index
df = df.reset_index(drop=True)

In [None]:
class NoveltyCalculator:
    '''Calculates novelty score for poems with Rouge'''
    def __init__(self, corpus):
        self.corpus = corpus
        self.rouge = Rouge()

    def compute_novelty(self, poem):
        novelty_scores = []

        for other_poem in self.corpus:
            if other_poem != poem:  
                scores = self.rouge.get_scores(poem, other_poem)
                novelty_scores.append(scores[0]['rouge-l']['f'])  

        
        if novelty_scores:  
            novelty_score = 1 - sum(novelty_scores) / len(novelty_scores)
        else:
            novelty_score = 0.0 

        return novelty_score


corpus = df['text'].to_list()
novelty_calculator = NoveltyCalculator(corpus)
novelty_scores = []

for i, poem in enumerate(corpus):
    print(f"Computing novelty for poem {i+1} out of {len(corpus)}")
    novelty_scores.append(novelty_calculator.compute_novelty(poem))
    

In [None]:
from rouge import Rouge
import itertools

class InterNoveltyCalculator:
    '''Calculates inter-novelty score for poems with Rouge. 
    Internovelty means repetition in every seperate poem. 
    We calculate Rouge score for every line pair in a poem and then get the average of all scores '''
    def __init__(self):
        self.rouge = Rouge()

    def compute_novelty(self, poem):
        rouge_scores = []

        poem = [line for line in poem if line.strip()]

        line_pairs = itertools.combinations(poem, 2) 
 
        for pair in line_pairs:
            scores = self.rouge.get_scores(pair[0], pair[1])
            rouge_scores.append(scores[0]['rouge-l']['f'])  
        
        novelty_score = 1 - sum(rouge_scores) / len(rouge_scores) if rouge_scores else 0.0
        
        return novelty_score

corpus = df['split_text'].to_list()[0:5]

novelty_calculator = InterNoveltyCalculator()
novelty_scores = []

for i, poem in enumerate(corpus):
    print(f"Computing novelty for poem {i+1} out of {len(corpus)}")
    novelty_scores.append(novelty_calculator.compute_novelty(poem))

#print("InterNovelty scores for each poem:", novelty_scores)