In [2]:
from rusyll import rusyll
import pandas as pd
from rouge import Rouge
import itertools

In [3]:
from transformers import AutoTokenizer,AutoModelForSequenceClassification

model_name_or_path = 'numblilbug/rubert-cased-poem-evalutation' 

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/711M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.57M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

# Оценка стихотворения по разным параметрам 

В этом файле представлены все скрипты, которые мы использовали для оценки стихотворений.

In [45]:
class PoemEvaluator:
    '''Evaluates a poem on several parameters'''

    def __init__(self, corpus):
        self.corpus = corpus
        self.rouge = Rouge()

    def compute_novelty(self, poem):
        '''Calculates novelty score for poems with Rouge. 
        Calculates Rouge score for each poem and then returns the mean'''
        novelty_scores = []

        for other_poem in self.corpus:
            if other_poem != poem:
                scores = self.rouge.get_scores(poem, other_poem)
                novelty_scores.append(scores[0]['rouge-l']['f'])

        if novelty_scores:
            novelty_score = 1 - sum(novelty_scores) / len(novelty_scores)
        else:
            novelty_score = 0.0

        return novelty_score

    def compute_repetition(self, poem):
        '''Calculates repetition score for poems with Rouge. 
        Calculates Rouge score for each line and then returns the mean'''
        rouge_scores = []

        poem_lines = [line for line in poem.split("\r\n") if line.strip()]

        line_pairs = itertools.combinations(poem_lines, 2)

        for pair in line_pairs:
            scores = self.rouge.get_scores(pair[0], pair[1])
            rouge_scores.append(scores[0]['rouge-l']['f'])

        repetition_score = 1 - sum(rouge_scores) / len(rouge_scores) if rouge_scores else 0.0

        return repetition_score

    def compute_rythm(self, poem):
        '''Computes rythm'''
        
        lines = poem.split("\n")
        true_lines = [line for line in lines if line.strip()]
        syllable_counts = [len(rusyll.token_to_syllables(line)) for line in true_lines]

        len_diff_len_lines = 1 - len(set(syllable_counts)) / len(true_lines)
        len_diff = len(set(syllable_counts))
        len_lines = len(true_lines)

        return len_diff_len_lines
    
    def compute_metrics(self,poem):
        '''Computes meaningfulness, grammar and emotionality with rubert-base-cased model'''
        
        inputs = tokenizer(poem, return_tensors="pt",padding=True, truncation=True,max_length=50, add_special_tokens = True)
        outputs = model(**inputs)
        predicted_logits = outputs.logits
        predicted_classes = (predicted_logits > 0.6).int()
        
        return predicted_classes.tolist()[0]
    
    def assess_poem(self, corpus):
        'Computes metrics for the entire corpus'
        novelty_scores = []
        repetition_scores = []
        rythm_scores = [] 
        metrics_scores = []
        for poem in corpus:
            novelty = PoemEvaluator.compute_novelty(self, poem)
            novelty_scores.append(novelty)
            repetition = PoemEvaluator.compute_repetition(self, poem)
            repetition_scores.append(repetition)
            rythm = PoemEvaluator.compute_rythm(self, poem)
            rythm_scores.append(rythm)
            metrics = PoemEvaluator.compute_metrics(self, poem)
            metrics_scores.append(metrics)
        print(f'Novelty scores: {novelty_scores} \nRepetition scores: {repetition_scores} \nRythm scores: {rythm_scores} \nMetrics scores (meaningfulness, grammar and emotionality respectively): {metrics_scores}')
        
        return novelty_scores, repetition_scores, rythm_scores, metrics_scores
        


In [47]:

corpus = 'Put here either a list of poems or one poem (in a list like ["poem"]). \
        If you put only one poem, novelty will not be computed'


In [48]:
poem_evaluator = PoemEvaluator(corpus)
novelty_scores, repetition_scores, rythm_scores, metrics_scores = poem_evaluator.assess_poem(corpus)

Novelty scores: [0.9585847190397998, 0.8719751948915344, 0.9639987165844446, 0.8921216856785629, 0.8841374304001891] 
Repetition scores: [0.0, 0.0, 0.0, 0.0, 0.0] 
Rythm scores: [0.75, 0.625, 0.65, 0.625, 0.5] 
Metrics scores (meaningfulness, grammar and emotionality respectively): [[1, 1, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0]]
