In [1]:
from evaluator import FormatEvaluator, ReportProcessor, ReportRewriter, AccuracyEvaluator
import os 

#### LLM Setup


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Set up Gemini LLM
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not set in environment.")

if not api_key.endswith('L1zMs'):
    print('Assigned API key manually.')
    api_key = 'AIzaSyCn8hsHeu8Owh8vigOtyoqePiyDdaL1zMs'

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  
    temperature=0,
    max_tokens=2048,  
    max_retries=2,
    google_api_key=api_key,
)

Assigned API key manually.


#### Process sample report


In [3]:
example_path = os.path.join(os.getcwd(), "reports/report_sudan_20250607.md")
example = ReportProcessor(example_path)

In [4]:
example_sections = example.get_sections()
print('SECTION NAMES:')
print(example_sections.keys())

SECTION NAMES:
dict_keys(['Summary of Recent Developments', 'Key Alliance Shifts', 'Security Implications for UN Operations', 'Forward Outlook', 'Key Trends', 'Hotspots', 'Broader Conflict Context'])


# Accuracy evaluation

In [5]:
one_section = example_sections.get('Key Alliance Shifts')

In [7]:
acc_eval = AccuracyEvaluator()
acc_eval.extract_verifiable_claims_one_section(one_section, llm)
acc_eval.claims_list

'```json\n[\n  "SLM-MM was a neutral former rebel group.",\n  "JEM-Jibril was a neutral former rebel group.",\n  "SLM-MM has officially joined the conflict on SAF\'s side.",\n  "JEM-Jibril has officially joined the conflict on SAF\'s side.",\n  "SLM-MM abandoned their role as mediators of a year-long ceasefire in El Fasher.",\n  "JEM-Jibril abandoned their role as mediators of a year-long ceasefire in El Fasher.",\n  "Nearly all Darfuri former rebel organizations have split internally over whether to support SAF.",\n  "Most factions are fighting alongside government forces.",\n  "A minority has formed a separate Neutral Joint Force.",\n  "In April 2025, Musa Hilal’s Sudanese Revolutionary Awakening Council (SRAC) publicly declared support for SAF.",\n  "There were earlier indications of warming relations between SRAC and RSF."\n]\n```'

#### Evaluate all sections


In [None]:
# evaluator = FormatEvaluator(model=llm)
# response = evaluator.evaluate_all_sections(example_sections)

In [6]:
# import json

# with open("example_feedback.md", "w", encoding="utf-8") as f:
#     f.write("# Evaluation Feedback\n\n")
#     for section, feedback in response.items():
#         f.write(f"## {section}\n")
#         if isinstance(feedback, dict):
#             for key, value in feedback.items():
#                 f.write(f"- **{key}**: {json.dumps(value, ensure_ascii=False)}\n")
#         else:
#             f.write(f"{feedback}\n")
#         # f.write("\n")

In [8]:
from evaluator.prompts.example_feedback import example_feedback

feedback = example_feedback
feedback

{'Summary of Recent Developments': {'accuracy_1': {'score': 3,
   'comment': 'The report states that violence dropped to its lowest levels in April 2025. This needs to be verified against available data. If this is not accurate, it needs to be corrected. If it is accurate, the source of this information should be cited.'},
  'accuracy_2': {'score': 4,
   'comment': "The claims about territorial control (SAF recapturing Khartoum in March, RSF's capture of Mellit, SAF's advances in Omdurman) need to be verified against known data. If these claims are not precise, they need to be corrected. If they are accurate, the source of this information should be cited."},
  'accuracy_3': {'score': 4,
   'comment': 'While no data points are explicitly missing or misstated, the report should include casualty figures or other relevant data to support the claim about the drop in violence. This would improve the accuracy and impact of the report.'},
  'relevance_1': {'score': 4,
   'comment': 'The summa

In [None]:
rewriter = ReportRewriter(llm, example_path)
# rewriter.context_content

In [10]:
# rewriter.report_content

In [None]:
# corrected_report = rewriter.rewrite_all_sections(feedback)
corrected_report

Rewriting section: Summary of Recent Developments
Rewriting section: Key Alliance Shifts
Rewriting section: Security Implications for UN Operations


{'Summary of Recent Developments': "Political violence across Sudan saw a notable decrease in April 2025, reaching the lowest levels since the conflict's onset two years prior [Source: ACLED, April 2025 Data]. This decline is correlated with the Sudanese Armed Forces (SAF) regaining control of key areas in Khartoum in March 2025, which resulted in a nearly 50% reduction in violent incidents within Khartoum state compared to February [Source: UN OCHA Situation Report, March 2025]. While overall casualty figures remain high, the decrease in reported incidents suggests a localized improvement in security.\n\nBeyond territorial gains, primarily SAF advances in Omdurman and the Rapid Support Forces (RSF) capture of Mellit [Source: Crisis Group Sudan Conflict Update, May 2025], the Sudanese security landscape is characterized by a significant realignment of forces, particularly impacting Darfur. This shift in alliances, driven by factors including resource competition and ethnic tensions, ha