In [None]:
import requests
import xml.etree.ElementTree as ET

def search_arxiv(query, max_results=10):
    base_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": query,
        "start": 0,
        "max_results": max_results,
        "sortBy": "relevance",
        "sortOrder": "descending",
    }
    
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Error: {response.status_code}")
        return None

def parse_arxiv_response(response):
    titles = []
    abstracts = []
    
    root = ET.fromstring(response)
    entries = root.findall("{http://www.w3.org/2005/Atom}entry")
    
    for entry in entries:
        title = entry.find("{http://www.w3.org/2005/Atom}title").text
        abstract = entry.find("{http://www.w3.org/2005/Atom}summary").text
        titles.append(title)
        abstracts.append(abstract)
    
    return titles, abstracts

# Example usage
query = "LGBT"
response = search_arxiv(query)
if response:
    titles, abstracts = parse_arxiv_response(response)



Titles: ['Multilingual Contextual Affective Analysis of LGBT People Portrayals in\n  Wikipedia', 'Adapting Coreference Resolution for Processing Violent Death Narratives', 'Locating Information Gaps and Narrative Inconsistencies Across\n  Languages: A Case Study of LGBT People Portrayals on Wikipedia', 'Eliciting Information from Sensitive Survey Questions', 'Analyzing Right-wing YouTube Channels: Hate, Violence and Discrimination', 'The FRENK Datasets of Socially Unacceptable Discourse in Slovene and\n  English', "Can't say cant? Measuring and Reasoning of Dark Jargons in Large\n  Language Models", 'OneLove beyond the field -- A few-shot pipeline for topic and sentiment\n  analysis during the FIFA World Cup in Qatar', 'The Sensitivity of Respondent-driven Sampling Method', 'Enriching gender in PER: A binary past and a complex future']
Abstracts: ["  Specific lexical choices in narrative text reflect both the writer's\nattitudes towards people in the narrative and influence the audienc

In [2]:
reference_titles = [
    "LGBT+ Inclusivity in Physics and Astronomy: A Best Practices Guide",
"Multilingual Contextual Affective Analysis of LGBT People Portrayals in   Wikipedia",
"LGBTQ-AI? Exploring Expressions of Gender and Sexual Orientation in   Chatbots",
"The Ethical Implications of Digital Contact Tracing for LGBTQIA+   Communities",
"LGBTQ Privacy Concerns on Social Media",
"Discrete Event Simulation to Evaluate Shelter Capacity Expansion Options   for LGBTQ+ Homeless Youth",
"Minority Stress Experienced by LGBTQ Online Communities during the   COVID-19 Pandemic",
"Detecting Harmful Online Conversational Content towards LGBTQIA+   Individuals",
"Benefits and Limitations of Remote Work to LGBTQIA+ Software   Professionals",
"LGBTQIA+ (In)Visibility in Computer Science and Software Engineering   Education"
]

In [4]:
from rouge_score import rouge_scorer

def calculate_rouge(predictions, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    results = {"rouge1": [], "rouge2": [], "rougeL": []}

    for pred in predictions:
        max_scores = {"rouge1": 0, "rouge2": 0, "rougeL": 0}
        for ref in references:
            scores = scorer.score(ref, pred)
            max_scores["rouge1"] = max(max_scores["rouge1"], scores["rouge1"].fmeasure)
            max_scores["rouge2"] = max(max_scores["rouge2"], scores["rouge2"].fmeasure)
            max_scores["rougeL"] = max(max_scores["rougeL"], scores["rougeL"].fmeasure)
        
        results["rouge1"].append(max_scores["rouge1"])
        results["rouge2"].append(max_scores["rouge2"])
        results["rougeL"].append(max_scores["rougeL"])

    # Compute average scores
    avg_scores = {key: sum(vals) / len(vals) for key, vals in results.items()}
    return avg_scores, results

# Example usage
# predictions = ["A novel approach to deep learning", "Deep learning for AI applications"]
# references = ["A new method for deep learning", "Applications of AI in deep learning"]

avg_scores, detailed_scores = calculate_rouge(titles, reference_titles)

print("Average ROUGE Scores:", avg_scores)
print("Detailed Scores for Each Prediction:", detailed_scores)

Average ROUGE Scores: {'rouge1': 0.28204803154648356, 'rouge2': 0.13066666666666665, 'rougeL': 0.2598229233213753}
Detailed Scores for Each Prediction: {'rouge1': [1.0, 0.11111111111111112, 0.37037037037037035, 0, 0.10526315789473685, 0.2727272727272727, 0.23999999999999996, 0.2, 0.23529411764705882, 0.28571428571428564], 'rouge2': [1.0, 0, 0.24000000000000005, 0, 0, 0, 0, 0.06666666666666667, 0, 0], 'rougeL': [1.0, 0.11111111111111112, 0.37037037037037035, 0, 0.10526315789473685, 0.1904761904761905, 0.16666666666666666, 0.13333333333333333, 0.23529411764705882, 0.28571428571428564]}
