# Single Agent on SummEval 

## Imports

In [1]:
import pandas as pd
from datasets import load_dataset
import os
from dotenv import load_dotenv
from autogen import ConversableAgent
import re
from tqdm import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


## Data

In [None]:
# Load and prepare a subset of the SummEval dataset
SummEval_Test = load_dataset("mteb/summeval", split="test")
df = pd.DataFrame(SummEval_Test)
problematic_indices = [5, 7, 8, 9, 10, 11, 18, 20, 26, 27, 33, 34, 39, 46, 61, 64, 68, 73, 75, 79, 85, 86, 88, 92, 96, 99]
df_filtered = df.drop(index=problematic_indices).reset_index(drop=True)
df_filtered = df_filtered[["text", "machine_summaries", "relevance", "coherence", "fluency", "consistency"]]
df_exploded = df_filtered.explode(["machine_summaries", "relevance", "coherence", "fluency", "consistency"]).reset_index(drop=True)
df_sampled = df_exploded.sample(n=100, random_state=42).reset_index(drop=True)
columns_to_round = ["relevance", "coherence", "fluency", "consistency"]
df_sampled[columns_to_round] = df_sampled[columns_to_round].astype(float).round().astype(int)
df_final = df_sampled

print(df_final.info())
print(df_final.head(1))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   text               100 non-null    object
 1   machine_summaries  100 non-null    object
 2   relevance          100 non-null    int64 
 3   coherence          100 non-null    int64 
 4   fluency            100 non-null    int64 
 5   consistency        100 non-null    int64 
dtypes: int64(4), object(2)
memory usage: 4.8+ KB
None
                                                text  \
0  Boss Nigel Pearson has urged Leicester to keep...   

                                   machine_summaries  relevance  coherence  \
0  jamie vardy scored an injury-time winner again...          2          2   

   fluency  consistency  
0        3            2  


## Config

In [3]:
# Load Azure OpenAI configuration from environment variables
load_dotenv()

api_key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
deployment_name = os.getenv("AZURE_DEPLOYMENT_NAME")
api_version = os.getenv("AZURE_API_VERSION", "2023-12-01-preview")

# Define the model configuration for Azure OpenAI API access
config_list = [
    {
        "model": deployment_name,
        "api_key": api_key,
        "base_url": f"{endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version={api_version}",
        "api_type": "azure",
        "api_version": api_version,  
        "temperature": 0,
        "cache_seed": 42,
    }
]

## System Design

In [4]:
# Define the system prompt for the Evaluator-Agent
agent_system_message = f"""
You are an objective Evaluator-Agent.
In this task you will evaluate the quality of a summary written for a news article.
To correctly solve this task, follow these steps:

    1. Carefully read the news article, be aware of the information it contains.
    2. Read the proposed summary.
    3. Rate the summary with integer values on a scale from 1 (worst) to 5 (best) by its relevance, consistency, fluency, and coherence.

Definitions:
    Relevance:
        - The rating measures how well the summary captures the key points of the article.
        - Consider whether all and only the important aspects are contained in the summary.
    Consistency:
        - The rating measures whether the facts in the summary are consistent with the facts in the original article.
        - Consider whether the summary does reproduce all facts accurately and does not make up untrue information.
    Fluency:
        - This rating measures the quality of individual sentences, are they well-written and grammatically correct.
        - Consider the quality of individual sentences.
    Coherence:
        - The rating measures the quality of all sentences collectively, to the fit together and sound naturally.
        - Consider the quality of the summary as a whole.

Give an explanation on your evaluation using about 200 words.
Always begin your output with: "As an objective Evaluator-Agent I think ..."
Always end your output with a JSON object with the following format:{{"relevance": score, "coherence": score, "fluency": score, "consistency": score}} 
"""

In [5]:
# Initialize the ConversableAgents for system setup
initializer = ConversableAgent(
    "initializer", 
    llm_config={"config_list": config_list},
    human_input_mode="NEVER",
    )

agent = ConversableAgent(
    "Evaluator-Agent", 
    llm_config={"config_list": config_list},
    system_message=agent_system_message,
    human_input_mode="NEVER",
    )

## Evaluation

In [6]:
# Define the evaluation function
def evaluate(text, machine_summaries, relevance, coherence, fluency, consistency):
    message = f""" 
    Article: {text}

    Summary: {machine_summaries}
    """

    result = initializer.initiate_chat(agent, message=message, max_turns=1)
    result_str = str(result)

    pattern = r'"relevance"\s*:\s*(\d+)'
    relevance_score = int(re.search(pattern, result_str).group(1))

    pattern = r'"coherence"\s*:\s*(\d+)'
    coherence_score = int(re.search(pattern, result_str).group(1))

    pattern = r'"fluency"\s*:\s*(\d+)'
    fluency_score = int(re.search(pattern, result_str).group(1))

    pattern = r'"consistency"\s*:\s*(\d+)'
    consistency_score = int(re.search(pattern, result_str).group(1))

    relevance_deviation = relevance_score - relevance
    coherence_deviation = coherence_score - coherence
    fluency_deviation = fluency_score - fluency
    consistency_deviation = consistency_score - consistency

    return {
        "relevance": {
            "ground_truth": relevance,
            "system_decision": relevance_score,
            "deviation": relevance_deviation
        },
        "coherence": {
            "ground_truth": coherence,
            "system_decision": coherence_score,
            "deviation": coherence_deviation
        },
        "fluency": {
            "ground_truth": fluency,
            "system_decision": fluency_score,
            "deviation": fluency_deviation
        },
        "consistency": {
            "ground_truth": consistency,
            "system_decision": consistency_score,
            "deviation": consistency_deviation
        }
    }

In [7]:
# Prepare evaluation data
num_rows = 100
df_subset = df_final.head(num_rows)

# Evaluate responses
results = []
for _, row in tqdm(df_subset.iterrows(), total=num_rows, desc="Progress"):
    result = evaluate(
        text=row["text"],
        machine_summaries=row["machine_summaries"],
        relevance=row["relevance"],
        coherence=row["coherence"],
        fluency=row["fluency"],
        consistency=row["consistency"]
    )
    results.append(result)

results_df = pd.DataFrame(results)
results_df.to_csv('Results/single.csv', index=False)

Progress:   0%|          | 0/100 [00:00<?, ?it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: Boss Nigel Pearson has urged Leicester to keep their cool and ignore their relegation rivals. The Foxes host Swansea on Saturday just three points from safety in the Barclays Premier League after back-to-back wins. Last week's 3-2 win at West Brom handed them a survival lifeline, although they remain bottom of the table. Jamie Vardy scored an injury-time winner against West Bromwich Albion on Saturday to improve his side's slim chance of Premier League survival Vardy celebrates in front of the travelling away fans after hitting the winner against West Brom But after their mini-revival, Pearson wants his side to remain focused on their own jobs. 'I'm very wary of people flipping the emphasis,' he said. 'Our future is in our own hands and if we go into the last game with that we have given ourselves a realistic chance. 'We need to make sure our own run-in is what we want it to be. Leicester manager Nigel Pearson has urged his pla

Progress:   8%|▊         | 8/100 [00:00<00:01, 74.15it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: MLS side Orlando City are the latest club to have expressed interest in Manchester United misfit Javier Hernandez. The Mexico international would be a huge commercial draw for the Florida-based franchise who are coached by former Everton and Manchester City striker Adrian Heath. Orlando have a huge Latin-American fanbase and made enquiries last week about the prospect of a deal. Javier Hernandez is linked with a move to Orlando City after enduring a tough time on loan at Real Madrid Orlando have a big Latin-American fanbase and Kaka is the captain of the MLS side Hernandez would be a popular arrival with Orlando supporters but eight European sides are also interested Hernandez has cut a frustrated figure during his loan spell at Real Madrid this season but still has plenty of interest from other Premier League and European sides. Southampton, Stoke, West Ham and Everton are all interested with United willing to sell for around 

Progress:  16%|█▌        | 16/100 [00:00<00:01, 73.82it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: Jamie Carragher believes Brendan Rodgers is still the right man to lead Liverpool forward after a season with no trophies and a likely finish outside the top four. Rodgers' future at Anfield has been questioned after a third straight season without a trophy, but the former Liverpool defender backed the Reds manager. Speaking after collecting a Beacon Award for his community work on Merseyside on Tuesday, Carragher said: 'I don't think your whole future should be determined by one game. Jamie Carragher shared this image on Instagram after collecting a Beacon Award on Tuesday 'It was difficult with the players that came in and I still don't think many of them have bedded in yet.' Liverpool are seven points behind fourth placed Manchester City with a game in hand and look set to miss out on Champions League qualification for next season. Carragher added: 'Brendan, the players and the staff will be looking to next season and thinki

Progress:  24%|██▍       | 24/100 [00:00<00:01, 70.32it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: Nathan Hughes on Friday night had his ban for accidentally knocking out George North sensationally over-turned on appeal, following an outcry on his behalf. The Wasps No 8 was initially suspended for three matches, after a disciplinary panel ruled he had been ‘reckless’ for failing to prevent his knee colliding with the head of the Welsh Lion, as he scored a try for Northampton on March 27. Hughes was shown a red card for the offence, but that dismissal has now been officially rescinded. Although it was accepted that there had been no intent, the Fiji-born player with England ambitions received a further sanction at the original hearing. Yet, in a stunning twist – and in light of fierce criticism – the ban was thrown out, as the appeal lodged by Wasps was up-held. Nathan Hughes's knee collided with George North's head as he crossed the line to score for Northampton North was left out cold on the pitch at Franklin's Gardens and 

Progress:  32%|███▏      | 32/100 [00:00<00:00, 71.25it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: He may not be playing much at Manchester United but Anders Lindegaard certainly has a lot to smile about off the pitch as he enjoyed a spot of sunshine with stunning wife Misse Beqiri. Lindegaard tied the knot with the Swedish model last year in a romantic beach wedding in Mauritius. And the United goalkeeper, who has struggled for minutes throughout his career in Manchester, was keen to remind fans via his Instagram page of his marriage to stunning Beqiri. Manchester United goalkeeper Anders Lindegaard made the most of the English weather with Misse Beqiri Lindegaard tied the knot with the stunning model last year and the couple have a son together Danish international Lindegaard posted a link to Beqiri's Instagram account with the message: 'Day off with my sunshine @missebeqiri #cheshire #manchester' The Swedish beauty, who has Albanian roots, likes to flaunt her good looks on Instagram and regularly posts pictures while she'

Progress:  40%|████      | 40/100 [00:00<00:00, 72.96it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: River Plate are keen to sign Manchester United striker Radamel Falcao but admit a deal is complicated. The Colombia forward spent eight years with the Argentine side before leaving for Porto in 2009 and River Plate are open to Falcao returning. During an interview with Esto es River program, vice president Matias Patanian said: 'We dream of Falcao Garcia. The doors are open.' River Plate are keen to sign former forward Radamel Falcao who has struggled on loan at Manchester United River Plate vice president Matias Patanian admits the club 'dream of Falcao' and that 'the doors are open' The 29-year-old has struggled during a season-long loan spell at Old Trafford this term - scoring just four Premier League goals - and it remains to be seen whether United will exercise the option to keep the frontman or whether he will return to parent club Monaco. However, Falcao has been in good goalscoring form for his countrythis week, findin

Progress:  48%|████▊     | 48/100 [00:00<00:00, 74.13it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: (CNN)A fiery sunset greeted people in Washington Sunday. The deep reddish color caught Seattle native Tim Durkan's eye. He photographed a handful of aerial shots of the sunset warming the city's skyline and shared them on CNN iReport. The stunning sunsets were the result of raging wildfires in parts of Siberia. "The dramatic sunsets began showing up over the weekend and had Seattle locals wondering where the amber-colored haze was originating from," Durken said. The fires were started in southeastern Siberia, by farmers burning grass in their fields. But on April 14, it is believed that the flames quickly grew out of control because of strong winds and spread throughout the region, according to CNN affiliate KOMO-TV. As a result, the fires have destroyed dozens of villages in the region. Rescue crews were able to put out the flames. However, the lingering smoke from the widespread fires were picked up by atmospheric winds. The 

Progress:  56%|█████▌    | 56/100 [00:00<00:00, 69.25it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: The owners of this house better not leave too quickly, after a speed camera was angled directly on their front door. The bright yellow gatso had previously enforced the 30mph speed limit for motorists along the residential road in Handsworth, Birmingham. However, it has not been working for two years after every single fixed device was switched off in the West Midlands. Big Brother is watching: A speed camera has been turned round and is pointing at this house in Birmingham, West Midlands The speed camera has not been working for more than two years Around 300 speed and traffic camera, using old technology, were turned off across the region in March 2013 In there place, speed enforcement operations have been carried out by a small number of mobile camera units, fixed cameras on motorways and traffic officers on patrol. Mystery surrounds who had re-pointed the camera, but a spokesman for Birmingham City Council said they were aw

Progress:  64%|██████▍   | 64/100 [00:00<00:00, 69.94it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: It's crunch time this weekend at the top of the Premier League as second-placed Arsenal host table toppers Chelsea - and it appears one star of the former's team cannot wait for the ever-nearing kick-off. Arsenal playmaker Mesut Ozil can now countdown the minutes until the 4pm encounter on Sunday with his brand new Apple Watch. The 26-year-old was the beneficiary of the timepiece - which ranges in price from £299 to £13,500 - as he visited an Apple store on Thursday afternoon. Arsenal midfielder Mesut Ozil (right) took to Instagram to show off his new Apple Watch on Thursday Ozil compared his watch to TV character Michael Knight in the hit action show Knight Rider Ozil took to Instagram to share his joy at his latest gift, comparing his gadget to former TV character Michael Knight from the hit action show Knight Rider. Accompanied with the caption: 'Excited about my new cool gadget! It's just like Michael Knight's watch, but a 

Progress:  72%|███████▏  | 72/100 [00:01<00:00, 69.35it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: (CNN)One of the biggest TV events of all time is being reimagined for new audiences. "Roots," the epic miniseries about an African-American slave and his descendants, had a staggering audience of over 100 million viewers back in 1977. Now A&E networks are remaking the miniseries, to air in 2016. A&E, Lifetime and History (formerly the History Channel) announced Thursday that the three networks would simulcast a remake of the saga of Kunta Kinte, an African who was captured, shipped to America and sold into slavery to work on a Virginia plantation. LeVar Burton, who portrayed Kinte in the original, will co-executive produce the new miniseries. A press release describes the new version as "original" and "contemporary" and will draw more from Alex Haley's classic novel, "Roots: The Saga of an American Family." Producers will consult scholars in African and African-American history for added authenticity. "We are proud to bring thi

Progress:  80%|████████  | 80/100 [00:01<00:00, 70.53it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: A teenager from Illinois is tackling her disability head-on and attempting to positively influence thousands of others by dancing. Dayna Dobias, 19, from Downers Grove was born with cerebral palsy, but she hopes to inspire others with her enthusiastic videos. 'I've gotten bullied because of it and I get people all the time, staring and so it's definitely made things difficult in my life,' Dayna told Daily Mail Online. Scroll down for video Challenging: Dayna Dobias, 18, is hoping change the way people with disabilities are represented by the television, film and the fashion industry Her hope is that the video not only entertains, but inspires others to think before judging. The teenager says her motivation for creating the video was to counteract stereotypes held by people over certain disabilities. 'People tend to think that because I have cerebral palsy I cannot do anything and that I am so different from everyone else, when 

Progress:  88%|████████▊ | 88/100 [00:01<00:00, 68.80it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: A teenager from Illinois is tackling her disability head-on and attempting to positively influence thousands of others by dancing. Dayna Dobias, 19, from Downers Grove was born with cerebral palsy, but she hopes to inspire others with her enthusiastic videos. 'I've gotten bullied because of it and I get people all the time, staring and so it's definitely made things difficult in my life,' Dayna told Daily Mail Online. Scroll down for video Challenging: Dayna Dobias, 18, is hoping change the way people with disabilities are represented by the television, film and the fashion industry Her hope is that the video not only entertains, but inspires others to think before judging. The teenager says her motivation for creating the video was to counteract stereotypes held by people over certain disabilities. 'People tend to think that because I have cerebral palsy I cannot do anything and that I am so different from everyone else, when 

Progress:  95%|█████████▌| 95/100 [00:01<00:00, 68.38it/s]

[33minitializer[0m (to Evaluator-Agent):

 
    Article: Jordan Henderson has provided Liverpool with a lift after their FA Cup heartache by agreeing a new long-term contract. The club's vice-captain had 14 months remaining on his current contract and his advisors had been in talks with Liverpool since the beginning of this season. They have now reached a resolution and Henderson is expected to put pen-to-paper on improved terms that are likely be worth in the region of £100,000. His new deal will run to 2020. Liverpool midfielder Jordan Henderson is set to sign a new long-term contract at Anfield Henderson chases down Aston Villa's Jack Grealish during Liverpool's FA Cup semi-final defeat at Wembley Henderson's new deal is worth around £100,000-a-week and will run until the summer of 2020 Henderson, 24, is the third big player in Brendan Rodgers' squad to agree a contract extension, following on from Daniel Sturridge and Philippe Coutinho. The England international, who was signed b

Progress: 100%|██████████| 100/100 [00:01<00:00, 70.04it/s]
