In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
# 1.1. Imports básicos
import pandas as pd
from pyAutoSummarizer.base.evaluation.base import get_summary_evaluation
from pyAutoSummarizer.base.evaluation.lexical import RougeEvaluator, BLEUEvaluator
from pyAutoSummarizer.base.evaluation.semantic import BERTScoreEvaluator
from research.src.functions import get_metrics_annotations, get_metrics_evaluator, get_corr


### Constantes

from research.src.constants import (
    PATH_SUMMEVAL_JSONL,
    LEXICAL_EVAL,
    LEXICAL_PREFIX,
    SEMANTIC_PREFIX,
    SEMANTIC_EVAL,
    JOIN_COLS,
    LEXICAL_COL,
    SEMANTIC_COL,
    NEW_METRIC_COL,
    EVAL_COLS,
    METHODS,
    N,
    HUMAN_COLS,
    FINAL_METRIC
)

  from .autonotebook import tqdm as notebook_tqdm


# SummEval

* id: Identificador único da instância (string), ex. dm-test-…, usado para rastreamento.

* decoded: Sumário gerado pelo modelo (string), é a saída a ser avaliada.

* expert_annotations: Lista de dicionários com notas de especialistas (coherence, consistency, fluency, relevance).

* turker_annotations: Lista de dicionários com notas crowdsourced nos mesmos critérios dos experts.

* references: Lista de sumários humanos de referência (strings) para comparação automática.

* model_id: Código do modelo que produziu decoded (string), ex. M11.

* filepath: Caminho do arquivo de origem do documento completo (string).

## Carregando os Dados

In [3]:
df = pd.read_json(PATH_SUMMEVAL_JSONL, lines=True)


In [4]:
df.tail()

Unnamed: 0,id,decoded,expert_annotations,turker_annotations,references,model_id,filepath
1595,dm-test-e880fda4c25289f8325574246f0f8ed4ff5eb26b,a timewarp home which has remained unchanged s...,"[{'coherence': 5, 'consistency': 5, 'fluency':...","[{'coherence': 4, 'consistency': 3, 'fluency':...",[Nondescript semi-detached home for sale in Ho...,M0,cnndm/dailymail/stories/e880fda4c25289f8325574...
1596,dm-test-e880fda4c25289f8325574246f0f8ed4ff5eb26b,"The collector 's paradise in Horfield , Bristo...","[{'coherence': 3, 'consistency': 5, 'fluency':...","[{'coherence': 4, 'consistency': 4, 'fluency':...",[Nondescript semi-detached home for sale in Ho...,M22,cnndm/dailymail/stories/e880fda4c25289f8325574...
1597,dm-test-e880fda4c25289f8325574246f0f8ed4ff5eb26b,"the collector’s paradise in horfield , bristol...","[{'coherence': 5, 'consistency': 5, 'fluency':...","[{'coherence': 4, 'consistency': 4, 'fluency':...",[Nondescript semi-detached home for sale in Ho...,M8,cnndm/dailymail/stories/e880fda4c25289f8325574...
1598,dm-test-e880fda4c25289f8325574246f0f8ed4ff5eb26b,"the collector 's paradise in horfield , bristo...","[{'coherence': 3, 'consistency': 5, 'fluency':...","[{'coherence': 4, 'consistency': 3, 'fluency':...",[Nondescript semi-detached home for sale in Ho...,M10,cnndm/dailymail/stories/e880fda4c25289f8325574...
1599,dm-test-e880fda4c25289f8325574246f0f8ed4ff5eb26b,the timewarp home has remained unchanged since...,"[{'coherence': 3, 'consistency': 5, 'fluency':...","[{'coherence': 4, 'consistency': 3, 'fluency':...",[Nondescript semi-detached home for sale in Ho...,M9,cnndm/dailymail/stories/e880fda4c25289f8325574...


### Análise Individual

In [5]:
df.loc[0]

id                     dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2
decoded               paul merson was brought on with only seven min...
expert_annotations    [{'coherence': 2, 'consistency': 1, 'fluency':...
turker_annotations    [{'coherence': 3, 'consistency': 3, 'fluency':...
references            [Andros Townsend an 83rd minute sub in Tottenh...
model_id                                                            M11
filepath              cnndm/dailymail/stories/8764fb95bfad8ee8492748...
Name: 0, dtype: object

### Inspeção de tipos e não-nulos

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600 entries, 0 to 1599
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  1600 non-null   object
 1   decoded             1600 non-null   object
 2   expert_annotations  1600 non-null   object
 3   turker_annotations  1600 non-null   object
 4   references          1600 non-null   object
 5   model_id            1600 non-null   object
 6   filepath            1600 non-null   object
dtypes: object(7)
memory usage: 87.6+ KB


In [7]:
df.id.value_counts(ascending=False).head(10)

id
dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2     16
dm-test-f26d8400ae49b90d109c165d0f44b8f6ca253c08     16
dm-test-207df192edc1836250b69d1bc5b9e6a38206eb78     16
dm-test-0f0789390d67698283cc87b2e046b5c5cd77edb7     16
cnn-test-fbbafa743a8c2ecd2cedf65c6c61956b2db8ec5c    16
dm-test-f37fd6e9b6cc18a7132568e307ef3b130931e809     16
dm-test-14c813567696f4e63a39993c09d4edb454036179     16
dm-test-e470f0a87d7513bf880412524332047020422c3f     16
dm-test-2c37d44d03ce2e91310339d884d33ee5aabf9abc     16
dm-test-e428e25bbf8e06643154ce31b7c6fab64c81e857     16
Name: count, dtype: int64

# Gerando Avg summeval metrics 

In [8]:
avg_summeval_metrics = get_metrics_annotations(df)
avg_summeval_metrics.tail()

Unnamed: 0,id,model_id,exp_coherence,exp_consistency,exp_fluency,exp_relevance,exp_overall_mean,turk_coherence,turk_consistency,turk_fluency,turk_relevance,turk_overall_mean
1595,dm-test-fadabe346fe95d33eee71299e6596754768f5246,M22,3.333333,5.0,5.0,4.666667,4.5,3.6,4.6,3.4,4.0,3.9
1596,dm-test-fadabe346fe95d33eee71299e6596754768f5246,M23,5.0,5.0,5.0,5.0,5.0,3.8,3.6,3.6,4.0,3.75
1597,dm-test-fadabe346fe95d33eee71299e6596754768f5246,M5,5.0,5.0,5.0,4.666667,4.916667,4.2,4.0,3.8,4.4,4.1
1598,dm-test-fadabe346fe95d33eee71299e6596754768f5246,M8,4.0,5.0,4.0,3.666667,4.166667,4.0,5.0,4.0,5.0,4.5
1599,dm-test-fadabe346fe95d33eee71299e6596754768f5246,M9,3.333333,5.0,5.0,4.666667,4.5,3.4,4.0,3.4,4.0,3.7


### Prova Real

* id : cnn-test-404f859482d47c127868964a9a39d1a7645dd2e9	
* model_id : M0

In [9]:
avg_summeval_metrics[(avg_summeval_metrics.id == "cnn-test-404f859482d47c127868964a9a39d1a7645dd2e9") & (avg_summeval_metrics.model_id == "M0")]

Unnamed: 0,id,model_id,exp_coherence,exp_consistency,exp_fluency,exp_relevance,exp_overall_mean,turk_coherence,turk_consistency,turk_fluency,turk_relevance,turk_overall_mean
0,cnn-test-404f859482d47c127868964a9a39d1a7645dd2e9,M0,4.666667,4.666667,4.666667,3.0,4.25,4.6,4.2,4.2,4.6,4.4


In [10]:
df.loc[1499,"expert_annotations"]

[{'coherence': 5, 'consistency': 5, 'fluency': 5, 'relevance': 2},
 {'coherence': 5, 'consistency': 5, 'fluency': 5, 'relevance': 4},
 {'coherence': 4, 'consistency': 4, 'fluency': 4, 'relevance': 3}]

* Coherence = 4.66
* Consistency = 4.66
* Fluency = 4.66
* Relevance = 3

In [11]:
df.loc[1499,"turker_annotations"]

[{'coherence': 5, 'consistency': 5, 'fluency': 5, 'relevance': 5},
 {'coherence': 4, 'consistency': 5, 'fluency': 4, 'relevance': 5},
 {'coherence': 5, 'consistency': 5, 'fluency': 5, 'relevance': 4},
 {'coherence': 4, 'consistency': 3, 'fluency': 2, 'relevance': 4},
 {'coherence': 5, 'consistency': 3, 'fluency': 5, 'relevance': 5}]

* Coherence = 4.6
* Consistency = 4.2
* Fluency = 4.2
* Relevance = 4.6

# pyAutoSummarizer

## Get Summary Evaluation

### Exemplo Pontual

In [12]:
referencia = "A energia solar usa a luz do sol para produzir eletricidade ou calor, e seu uso crescente ajuda a reduzir custos e emissões, promovendo uma matriz energética mais diversificada."
gerado = "A energia solar, que transforma luz solar em eletricidade, tem se tornado mais popular por ser limpa, barata e ajudar no meio ambiente."


In [13]:
resultados =get_summary_evaluation(
    reference_summary=referencia,
    generated_summary=gerado,
    evaluators=lexical_eval
)

In [14]:
resultados

{'rouge1_f1': 0.25925925925925924,
 'rougeL_f1': 0.2222222222222222,
 'bleu': 0.02644168646195725}

### Exemplo Pontual - usando dados da Base SummEval

In [15]:
gerado_summeval_0 = df.loc[0,"decoded"]
print(gerado_summeval_0)

paul merson was brought on with only seven minutes remaining in his team 's 0-0 draw with burnley . andros townsend scored the tottenham midfielder in the 89th minute . paul merson had another dig at andros townsend after his appearance . the midfielder had been brought on to the england squad last week . click here for all the latest arsenal news news .


In [16]:
reference_summeval_0 = df.loc[0,"references"][0]
print(reference_summeval_0)

Andros Townsend an 83rd minute sub in Tottenham's draw with Burnley. He was unable to find a winner as the game ended without a goal. Townsend had clashed with Paul Merson last week over England call-up.


In [17]:
resultados =get_summary_evaluation(
    reference_summary=reference_summeval_0,
    generated_summary=gerado_summeval_0,
    evaluators=lexical_eval
)

In [18]:
resultados

{'rouge1_f1': 0.404040404040404,
 'rougeL_f1': 0.22222222222222224,
 'bleu': 0.01853267661331109}

# Gerando Avg Evaluator Metrics

In [19]:
df.head(2)

Unnamed: 0,id,decoded,expert_annotations,turker_annotations,references,model_id,filepath
0,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,paul merson was brought on with only seven min...,"[{'coherence': 2, 'consistency': 1, 'fluency':...","[{'coherence': 3, 'consistency': 3, 'fluency':...",[Andros Townsend an 83rd minute sub in Tottenh...,M11,cnndm/dailymail/stories/8764fb95bfad8ee8492748...
1,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,paul merson has restarted his row with andros ...,"[{'coherence': 3, 'consistency': 5, 'fluency':...","[{'coherence': 2, 'consistency': 3, 'fluency':...",[Andros Townsend an 83rd minute sub in Tottenh...,M13,cnndm/dailymail/stories/8764fb95bfad8ee8492748...


In [20]:
df_agg_lexical = get_metrics_evaluator(df.head(3),lexical_eval , LEXICAL_PREFIX)
df_agg_semantic= get_metrics_evaluator(df.head(3),semantic_eval , SEMANTIC_PREFIX )

100%|██████████| 33/33 [00:00<00:00, 321.41it/s]
100%|██████████| 33/33 [01:13<00:00,  2.24s/it]


In [21]:
df_agg = pd.merge(df_agg_lexical, df_agg_semantic, on= JOIN_COLS)

In [22]:
df_agg.head(2)

Unnamed: 0,id,model_id,lexical_rouge1_f1,lexical_rougeL_f1,lexical_bleu,lexical_overall_mean,semantic_bert_score_precision,semantic_bert_score_recall,semantic_bert_score_f1,semantic_overall_mean
0,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M1,0.292654,0.200857,0.010909,0.16814,0.12494,-0.114202,0.003692,0.00481
1,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M11,0.303723,0.191623,0.01155,0.168965,0.125775,-0.147368,-0.013298,-0.01163


# Gerando Metrics Frame

In [None]:
from research.src.functions import get_combinated_metric


In [35]:
metrics_frame = get_combinated_metric(avg_summeval_metrics, df_agg, JOIN_COLS)

In [45]:
metrics_frame[JOIN_COLS + [NEW_METRIC_COL]]

Unnamed: 0,id,model_id,new_metric_col
0,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M1,0.086475
1,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M11,0.078667
2,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M13,0.070931


# Gerando Correlation Frame

In [26]:



correlation_table = get_corr(metrics_frame, eval_cols, human_cols,  methods)

In [27]:
correlation_table

Unnamed: 0,exp_coherence,exp_consistency,exp_fluency,exp_relevance,exp_overall_mean
lexical_rouge1_f1,-0.866025,-0.866025,-0.866025,-0.866025,-0.866025
lexical_rougeL_f1,0.866025,0.866025,0.866025,0.866025,0.866025
lexical_bleu,0.0,0.0,0.0,0.0,0.0
lexical_overall_mean,-0.866025,-0.866025,-0.866025,-0.866025,-0.866025
semantic_bert_score_precision,-0.866025,-0.866025,-0.866025,-0.866025,-0.866025
semantic_bert_score_recall,0.0,0.0,0.0,0.0,0.0
semantic_bert_score_f1,0.0,0.0,0.0,0.0,0.0
semantic_overall_mean,0.0,0.0,0.0,0.0,0.0
new_metric_col,0.0,0.0,0.0,0.0,0.0


# Gerando Final_Corr

In [31]:
FINAL_CORR = correlation_table.loc[NEW_METRIC_COL,FINAL_METRIC]
print(FINAL_CORR)

0.0
