In [1]:
from evaluator import FormatEvaluator, ReportProcessor, ReportRewriter, AccuracyEvaluator
import os 

#### LLM Setup


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Set up Gemini LLM
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not set in environment.")

if not api_key.endswith('L1zMs'):
    print('Assigned API key manually.')
    api_key = 'AIzaSyCn8hsHeu8Owh8vigOtyoqePiyDdaL1zMs'

llm_T0 = ChatGoogleGenerativeAI(model="gemini-2.5-flash", 
                                temperature=0,
                                max_tokens=2048,  
                                max_retries=2,
                                google_api_key=api_key,)

llm_T1 = ChatGoogleGenerativeAI(model="gemini-2.5-flash", 
                                temperature=1,
                                max_tokens=2048,  
                                max_retries=2,
                                google_api_key=api_key,)

Assigned API key manually.


#### Process sample report


In [3]:
example_path = os.path.join(os.getcwd(), "reports/report_sudan_20250607.md")
example = ReportProcessor(example_path)

In [4]:
example_sections = example.get_sections()
print('SECTION NAMES:')
print(example_sections.keys())

SECTION NAMES:
dict_keys(['Summary of Recent Developments', 'Key Alliance Shifts', 'Security Implications for UN Operations', 'Forward Outlook', 'Key Trends', 'Hotspots', 'Broader Conflict Context'])


# Accuracy evaluation

## Claims w/ T=0 and Questions w/ T=0

In [5]:
alliances_section = example_sections.get('Key Alliance Shifts')
print(alliances_section)

- **Darfur's Joint Force Coalition**:  
  Previously neutral former rebel groups (SLM-MM and JEM-Jibril) have officially joined the conflict on SAF's side, abandoning their role as mediators of a year-long ceasefire in El Fasher.

- **Fragmentation of Rebel Groups**:  
  Nearly all Darfuri former rebel organizations have split internally over whether to support SAF, with most factions now fighting alongside government forces while a minority has formed a separate *"Neutral Joint Force."*

- **SRAC's Strategic Alignment**:  
  In April 2025, Musa Hilal’s **Sudanese Revolutionary Awakening Council (SRAC)** publicly declared support for SAF, despite earlier indications of warming relations with RSF.  
  *Source: Polgeonow*

---


In [6]:
acc_eval = AccuracyEvaluator()
acc_eval.extract_verifiable_claims_one_section(alliances_section, llm_T0)
print(acc_eval.claims_list)

```json
[
  "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
  "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
  "The former rebel group SLM-MM abandoned their role as mediators of a year-long ceasefire in El Fasher.",
  "The former rebel group JEM-Jibril abandoned their role as mediators of a year-long ceasefire in El Fasher.",
  "Nearly all Darfuri former rebel organizations have split internally over whether to support SAF (Sudanese Armed Forces).",
  "Most Darfuri former rebel organizations' factions are now fighting alongside government forces.",
  "A minority of Darfuri former rebel organizations' factions has formed a separate Neutral Joint Force.",
  "In April 2025, Musa Hilal’s Sudanese Revolutionary Awakening Council (SRAC) publicly declared support for SAF (Sudanese Armed Forces)."
]
```


In [7]:
acc_eval.generate_questions_one_section(llm_T0)

In [8]:
print(acc_eval.questions_dict)

Here are the questions designed to verify the claims, focusing on clarity, conciseness, and closed-ended answers:

```json
[
  {
    "claim": "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
    "questions": [
      "Has SLM-MM officially announced its support for SAF in the current conflict?",
      "Is there documented evidence (e.g., statements, videos) of SLM-MM fighters engaging in combat alongside SAF forces?"
    ]
  },
  {
    "claim": "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
    "questions": [
      "Has JEM-Jibril officially announced its support for SAF in the current conflict?",
      "Is there documented evidence (e.g., statements, videos) of JEM-Jibril fighters engaging in combat alongside SAF forces?"
    ]
  },
  {
    "claim": "The former rebel group SLM-MM abandoned their role as mediators of a year-long ceasefire in El Fasher.",
    "questions": [
      "Was SLM-MM a designated mediator o

## Claims w/ T=0 and Questions w/ T=1

In [9]:
acc_eval_C0_Q1 = AccuracyEvaluator()
acc_eval_C0_Q1.extract_verifiable_claims_one_section(alliances_section, llm_T0)
print(acc_eval_C0_Q1.claims_list)

```json
[
  "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
  "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
  "The former rebel group SLM-MM abandoned their role as mediators of a year-long ceasefire in El Fasher.",
  "The former rebel group JEM-Jibril abandoned their role as mediators of a year-long ceasefire in El Fasher.",
  "Nearly all Darfuri former rebel organizations have split internally over whether to support SAF (Sudanese Armed Forces).",
  "Most Darfuri former rebel organizations' factions are now fighting alongside government forces.",
  "A minority of Darfuri former rebel organizations' factions has formed a separate Neutral Joint Force.",
  "In April 2025, Musa Hilal’s Sudanese Revolutionary Awakening Council (SRAC) publicly declared support for SAF (Sudanese Armed Forces)."
]
```


In [10]:
acc_eval_C0_Q1.generate_questions_one_section(llm_T1)
print(acc_eval_C0_Q1.questions_dict)

Here are the questions I would ask to verify the claims, designed to elicit factual, objective answers:

```json
[
  {
    "claim": "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
    "questions": [
      "Has SLM-MM officially declared its support for SAF in the current conflict?",
      "Are there documented instances of SLM-MM fighters engaging in combat alongside SAF forces?"
    ]
  },
  {
    "claim": "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
    "questions": [
      "Has JEM-Jibril officially declared its support for SAF in the current conflict?",
      "Are there documented instances of JEM-Jibril fighters engaging in combat alongside SAF forces?"
    ]
  },
  {
    "claim": "The former rebel group SLM-MM abandoned their role as mediators of a year-long ceasefire in El Fasher.",
    "questions": [
      "Was SLM-MM a formally designated mediator of a year-long ceasefire in El Fasher?",
      "Is the

## Claims w/ T=1 and Questions w/ T=0

In [11]:
acc_eval_C1_Q0 = AccuracyEvaluator()
acc_eval_C1_Q0.extract_verifiable_claims_one_section(alliances_section, llm_T1)
print(acc_eval_C1_Q0.claims_list)

```json
[
  "The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibril (Justice and Equality Movement-Jibril) were previously neutral former rebel groups.",
  "The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibril (Justice and Equality Movement-Jibril) have officially joined the conflict on the side of SAF (Sudanese Armed Forces).",
  "The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibril (Justice and Equality Movement-Jibril) have abandoned their role as mediators of a year-long ceasefire in El Fasher.",
  "Nearly all Darfuri former rebel organizations have split internally over whether to support SAF (Sudanese Armed Forces).",
  "Most Darfuri former rebel organizations' factions are now fighting alongside government forces.",
  "A minority of Darfuri former rebel organizations' factions has formed a separate Neutral Joint Force.",
  "In April 2025, Musa Hilal’s Sudanese Revolutionary Awakening Council (SRAC) publicly declared

In [12]:
acc_eval_C1_Q0.generate_questions_one_section(llm_T0)
print(acc_eval_C1_Q0.questions_dict)

Here are the questions designed to verify each claim:

**Claim 1: The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibril (Justice and Equality Movement-Jibril) were previously neutral former rebel groups.**

1.  Prior to the current conflict, were SLM-MM and JEM-Jibril officially designated as neutral parties in any peace agreements or ceasefire arrangements?
2.  Prior to the current conflict, were SLM-MM and JEM-Jibril considered former rebel groups?

**Claim 2: The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibril (Justice and Equality Movement-Jibril) have officially joined the conflict on the side of SAF (Sudanese Armed Forces).**

1.  Have SLM-MM and JEM-Jibril issued official statements declaring their alliance with SAF in the current conflict?
2.  Have SLM-MM and JEM-Jibril engaged in joint military operations with SAF against RSF in the current conflict?

**Claim 3: The groups SLM-MM (Sudan Liberation Movement-Minni Minnawi) and JEM-Jibr

## Claims w/ T=1 and Questions w/ T=1

In [13]:
acc_eval_C1_Q1 = AccuracyEvaluator()
acc_eval_C1_Q1.extract_verifiable_claims_one_section(alliances_section, llm_T1)
print(acc_eval_C1_Q1.claims_list)

```json
[
  "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
  "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
  "The former rebel group SLM-MM has abandoned its role as mediator of a year-long ceasefire in El Fasher.",
  "The former rebel group JEM-Jibril has abandoned its role as mediator of a year-long ceasefire in El Fasher.",
  "Nearly all Darfuri former rebel organizations have split internally over whether to support SAF (Sudanese Armed Forces).",
  "Most Darfuri former rebel organizations' factions are fighting alongside government forces.",
  "A minority of Darfuri former rebel organizations' factions have formed a separate Neutral Joint Force.",
  "In April 2025, Musa Hilal’s Sudanese Revolutionary Awakening Council (SRAC) publicly declared support for SAF (Sudanese Armed Forces).",
  "Prior to April 2025, there were indications of warming relations between Musa Hilal’s Sudanese Revolutionary Awakening C

In [14]:
acc_eval_C1_Q1.generate_questions_one_section(llm_T1)
print(acc_eval_C1_Q1.questions_dict)

Here are the questions designed to verify each claim:

```json
[
  {
    "claim": "The former rebel group SLM-MM has officially joined the conflict on SAF's side.",
    "questions": [
      "Has SLM-MM publicly announced its support for SAF in the current conflict?",
      "Are SLM-MM forces currently engaged in joint military operations with SAF?",
      "Has SAF officially confirmed SLM-MM's alliance in the current conflict?"
    ]
  },
  {
    "claim": "The former rebel group JEM-Jibril has officially joined the conflict on SAF's side.",
    "questions": [
      "Has JEM-Jibril publicly announced its support for SAF in the current conflict?",
      "Are JEM-Jibril forces currently engaged in joint military operations with SAF?",
      "Has SAF officially confirmed JEM-Jibril's alliance in the current conflict?"
    ]
  },
  {
    "claim": "The former rebel group SLM-MM has abandoned its role as mediator of a year-long ceasefire in El Fasher.",
    "questions": [
      "Was SLM-MM pr

#### Evaluate all sections


In [None]:
# evaluator = FormatEvaluator(model=llm)
# response = evaluator.evaluate_all_sections(example_sections)

In [6]:
# import json

# with open("example_feedback.md", "w", encoding="utf-8") as f:
#     f.write("# Evaluation Feedback\n\n")
#     for section, feedback in response.items():
#         f.write(f"## {section}\n")
#         if isinstance(feedback, dict):
#             for key, value in feedback.items():
#                 f.write(f"- **{key}**: {json.dumps(value, ensure_ascii=False)}\n")
#         else:
#             f.write(f"{feedback}\n")
#         # f.write("\n")

In [8]:
from evaluator.prompts.example_feedback import example_feedback

feedback = example_feedback
feedback

{'Summary of Recent Developments': {'accuracy_1': {'score': 3,
   'comment': 'The report states that violence dropped to its lowest levels in April 2025. This needs to be verified against available data. If this is not accurate, it needs to be corrected. If it is accurate, the source of this information should be cited.'},
  'accuracy_2': {'score': 4,
   'comment': "The claims about territorial control (SAF recapturing Khartoum in March, RSF's capture of Mellit, SAF's advances in Omdurman) need to be verified against known data. If these claims are not precise, they need to be corrected. If they are accurate, the source of this information should be cited."},
  'accuracy_3': {'score': 4,
   'comment': 'While no data points are explicitly missing or misstated, the report should include casualty figures or other relevant data to support the claim about the drop in violence. This would improve the accuracy and impact of the report.'},
  'relevance_1': {'score': 4,
   'comment': 'The summa

In [None]:
rewriter = ReportRewriter(llm, example_path)
# rewriter.context_content

In [10]:
# rewriter.report_content

In [None]:
# corrected_report = rewriter.rewrite_all_sections(feedback)
corrected_report

Rewriting section: Summary of Recent Developments
Rewriting section: Key Alliance Shifts
Rewriting section: Security Implications for UN Operations


{'Summary of Recent Developments': "Political violence across Sudan saw a notable decrease in April 2025, reaching the lowest levels since the conflict's onset two years prior [Source: ACLED, April 2025 Data]. This decline is correlated with the Sudanese Armed Forces (SAF) regaining control of key areas in Khartoum in March 2025, which resulted in a nearly 50% reduction in violent incidents within Khartoum state compared to February [Source: UN OCHA Situation Report, March 2025]. While overall casualty figures remain high, the decrease in reported incidents suggests a localized improvement in security.\n\nBeyond territorial gains, primarily SAF advances in Omdurman and the Rapid Support Forces (RSF) capture of Mellit [Source: Crisis Group Sudan Conflict Update, May 2025], the Sudanese security landscape is characterized by a significant realignment of forces, particularly impacting Darfur. This shift in alliances, driven by factors including resource competition and ethnic tensions, ha