# Research Assistant Agent

This notebook illustrates how the very first version of this agent was developed

In [6]:
import sys
from pathlib import Path
from typing import Literal
from dotenv import load_dotenv

from langchain_core.messages import SystemMessage, AIMessage
from langchain.chat_models import init_chat_model
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.graph import START, END, StateGraph

# Add src to path for imports
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root / "src"))

# Load environment variables
env_path = project_root / '.env'
load_dotenv(dotenv_path=env_path)

print("✓ Imports successful")
print(f"✓ Loading .env from: {env_path}")

✓ Imports successful
✓ Loading .env from: /Users/juha/development/semantic-byte/ai-engineering/agentic-newsroom/.env


In [7]:
slug = "socotra_inside_the_alien_island"

## Prompts

In [8]:
# ============= FROZEN PROMPTS =============

research_question_instructions = """You are a Research Assistant working for a reporter on an article based on the following story brief:

<StoryBrief>
{story_brief}
</StoryBrief>

Your task is to generate relevant and interesting questions or topics to research.
These questions will be used to search the web and Wikipedia to gather context and facts.

<Context>
You will see the history of queries and the summaries of materials collected so far.
Use this context to avoid repeating searches and to drill deeper into interesting areas.
</Context>

<Instructions>
1. Analyze the current state of research. What is missing?
2. Generate 1-2 targeted questions or topics to fill those gaps.
3. Focus on specific details (names, dates, figures) rather than broad generalities if the basics are already covered.
</Instructions>
"""

wikipedia_search_instructions = """You are an expert at creating Wikipedia search queries.

<StoryBrief>
{story_brief}
</StoryBrief>

<Search Target>
This query will be used for **Wikipedia search** to find encyclopedic articles with factual information.
</Search Target>

<Instructions>
1. Look at the latest research question or topic in the context.
2. Generate a **simple, direct query** targeting a Wikipedia article title or main topic.
3. Wikipedia search works best with:
   - Single entities or concepts: "Socotra", "Dracaena cinnabari", "Dragon blood tree"
   - Proper nouns: place names, species names, historical events
   - 1-4 words maximum
4. Avoid:
   - Questions or conversational language
   - Long phrases or multiple concepts
   - Adjectives or qualifiers unless essential
   
Examples of good Wikipedia queries:
✓ "Socotra"
✓ "Dragon blood tree"

Examples of bad Wikipedia queries:
✗ "Socotra dragon blood tree ecology and conservation"
✗ "How does climate change affect Socotra?"
</Instructions>
"""

web_search_instructions = """You are an expert Search Query Optimizer.
Your goal is to convert a research question into the most effective search query for a search engine (like Google or Tavily) or Wikipedia.

<StoryBrief>
{story_brief}
</StoryBrief>

<Instructions>
1. Look at the latest research question or topic provided in the context.
2. Formulate a specific, keyword-rich query that is likely to yield high-quality results.
3. Avoid conversational language (e.g., "Tell me about...") and focus on entities and keywords.
</Instructions>
"""


collect_material_instructions = """You are a Research Assistant working for a reporter on an article based on the following story brief:

<StoryBrief>
{story_brief}
</StoryBrief>

Your primary task is to process the raw search results and extract relevant information.
However, you must also critically evaluate if we have enough information to write the article.

<Instructions>
1. **Extract Material**: Review the raw search results. Extract key facts, quotes, and figures into structured items.
2. **Evaluate Sufficiency**: Compare the TOTAL information gathered so far (including previous turns) against the Story Brief.
   - Have we answered the key questions in the brief?
   - Do we have specific details (dates, names, numbers), not just generalities?
   - Are there obvious gaps?
3. **Decision**:
   - If the material is sufficient to write a high-quality article, mark as COMPLETE.
   - If important questions remain unanswered or the material is too thin, mark as INCOMPLETE.
</Instructions>

<Quality Control>
- Do not stop just because you found *something*. Stop only when you have *enough*.
- If you are unsure, err on the side of continuing research.
</Quality Control>
"""

## State and Structured Classes

In [9]:
# ============= FROZEN SCHEMAS =============

from typing import List, Optional, TypedDict, Annotated
from pydantic import BaseModel, Field
from langchain_core.messages import BaseMessage
import operator

class StoryBrief(BaseModel):
    """Story brief created by the Assignment Editor."""
    topic: str = Field(..., description="Clear statement of what the story is about")
    angle: str = Field(..., description="The specific approach or perspective to take")
    length: int = Field(..., description="The length of the article in words")
    key_questions: List[str] = Field(..., description="3-5 questions the article should answer")

class SearchResult(BaseModel):
    search_result: str = Field(..., description="The search result content")
    source: str = Field(..., description="The source of the search result")

class ResearchPackage(BaseModel):
    """Research package created by the Research Assistant."""
    results: List[SearchResult] = Field(..., description="List of collected search results")

class SearchQuery(BaseModel):
    search_query: str = Field(..., description="The search query")

class ResearchState(TypedDict):
    """State for the Research Assistant subgraph."""
    story_brief: StoryBrief
    context: Annotated[List[BaseMessage], operator.add]
    search_results: Annotated[List[SearchResult], operator.add]
    max_num_turns: int
    current_turn: int
    is_research_complete: bool

## Tools and Configuration

In [10]:
from langchain_tavily import TavilySearch
from langchain_community.document_loaders import WikipediaLoader

tavily_search = TavilySearchResults(max_results=3)

model = init_chat_model(model="openai:gpt-5-mini", reasoning_effort="minimal")

## Nodes

In [11]:
# ============= FROZEN TOOL FUNCTIONS =============

import json
from typing import List, Any

def format_wikipedia_results(search_docs: List) -> str:
    """Format Wikipedia search results into a structured string."""
    formatted_docs = []
    
    for doc in search_docs:
        source = doc.metadata.get("source", "unknown")
        page = doc.metadata.get("page", "")
        content = doc.page_content
        
        formatted_docs.append(
            f'<Document source="{source}" page="{page}"/>\n{content}\n</Document>'
        )
    
    return "\n\n---\n\n".join(formatted_docs)

def wikipedia_search(query: str, max_docs: int = 2) -> str:
    """Search Wikipedia and return formatted results."""
    try:
        loader = WikipediaLoader(query=query, load_max_docs=max_docs)
        search_docs = loader.load()
        return format_wikipedia_results(search_docs)
    except Exception as e:
        raise Exception(f"Error performing Wikipedia search: {str(e)}")

def format_tavily_results(search_docs: Any) -> str:
    """Format Tavily search results into a structured string."""
    formatted_docs = []
    
    # Case 1: It's a string (maybe JSON?)
    if isinstance(search_docs, str):
        try:
            search_docs = json.loads(search_docs)
        except:
            formatted_docs.append(f"<Document>\n{search_docs}\n</Document>")
            search_docs = []
    
    # Case 2: It's a list
    if isinstance(search_docs, list):
        for doc in search_docs:
            if isinstance(doc, dict):
                url = doc.get("url", "unknown")
                content = doc.get("content", str(doc))
                formatted_docs.append(f'<Document href="{url}"/>\n{content}\n</Document>')
            elif isinstance(doc, str):
                formatted_docs.append(f"<Document>\n{doc}\n</Document>")
            else:
                formatted_docs.append(f"<Document>\n{str(doc)}\n</Document>")
    
    return "\n\n---\n\n".join(formatted_docs)

def tavily_search_tool(query: str, max_results: int = 3) -> str:
    """Perform a web search using Tavily and return formatted results."""
    tavily_tool = TavilySearchResults(max_results=max_results)
    
    try:
        search_docs = tavily_tool.invoke({"query": query})
        return format_tavily_results(search_docs)
    except Exception as e:
        raise Exception(f"Error performing web search: {str(e)}")

# ============= NODES =============

def generate_queries(state: ResearchState):
    """Generate search queries based on the story brief."""
    
    story_brief = state["story_brief"]
    context = state.get("context", [])
    current_turn = state.get("current_turn", 0)

    # Generate search queries
    system_msg = research_question_instructions.format(
        story_brief=story_brief.model_dump_json()
    )
    
    response = model.invoke([SystemMessage(content=system_msg)] + context)

    return {
        "context": [response],
        "current_turn": current_turn + 1
    }

def search_web(state: ResearchState):
    """Search the web using Tavily based on the conversation context."""
    
    story_brief = state["story_brief"]
    context = state.get("context", [])
    
    # Generate query using general search instructions
    structured_model = model.with_structured_output(SearchQuery)
    system_msg = SystemMessage(
        content=web_search_instructions.format(story_brief=story_brief.model_dump_json())
    )
    search_query = structured_model.invoke([system_msg] + context)
    
    # Execute search using the tool (handles both search and formatting)
    try:
        formatted_docs = tavily_search_tool(search_query.search_query, max_results=3)
    except Exception as e:
        return {"context": [f"Error performing web search: {str(e)}"]}
    
    return {"context": [formatted_docs]}

def search_wikipedia(state: ResearchState):
    """Retrieve docs from Wikipedia based on the conversation context."""
    
    story_brief = state["story_brief"]
    context = state.get("context", [])
    
    # Generate a structured search query from the context
    structured_model = model.with_structured_output(SearchQuery)
    system_msg = SystemMessage(
        content=wikipedia_search_instructions.format(story_brief=story_brief.model_dump_json())
    )
    search_query = structured_model.invoke([system_msg] + context)
    
    # Execute search using the tool
    try:
        formatted_search_docs = wikipedia_search(search_query.search_query, max_docs=2)
    except Exception as e:
        return {"context": [f"Error performing Wikipedia search: {str(e)}"]}
    
    return {"context": [formatted_search_docs]}
    
    
def collect_material(state: ResearchState):
    """Process search results, extract structured data, and decide if research is complete."""
    
    story_brief = state["story_brief"]
    context = state.get("context", [])
    
    # Define the structure for the LLM's analysis
    class ResearchEvaluation(BaseModel):
        new_material: list[SearchResult] = Field(
            description="Relevant facts and information extracted from the latest search results"
        )
        is_complete: bool = Field(
            description="True if the gathered information is sufficient to answer the research questions, False if more research is needed"
        )
        reasoning: str = Field(
            description="Brief explanation of why research is complete or what is still missing"
        )

    system_msg = SystemMessage(
        content=collect_material_instructions.format(
            story_brief=story_brief.model_dump_json()
        )
    )
    
    structured_model = model.with_structured_output(ResearchEvaluation)
    evaluation = structured_model.invoke([system_msg] + context)
    
    # Create a summary message for the context
    summary_msg = (
        f"Collected {len(evaluation.new_material)} new items.\n"
        f"Status: {'Complete' if evaluation.is_complete else 'Continuing'}\n"
        f"Reasoning: {evaluation.reasoning}"
    )
    
    return {
        "search_results": evaluation.new_material,
        "context": [AIMessage(content=summary_msg)],
        "is_research_complete": evaluation.is_complete
    }

print("✓ Nodes defined")

✓ Nodes defined


## Routing

In [12]:
def should_continue(state: ResearchState) -> Literal["generate_queries", END]:
    """Decide whether to continue research or end."""
    
    # Check loop limits
    current_turn = state.get("current_turn", 0)
    max_turns = state.get("max_num_turns", 3)
    
    if current_turn >= max_turns:
        print(f"DEBUG: Max turns ({max_turns}) reached. Ending research.")
        return END
    
    # Check LLM decision
    if state.get("is_research_complete", False):
        print("DEBUG: LLM decided research is complete. Ending research.")
        return END
        
    print(f"DEBUG: Continuing research (Turn {current_turn + 1}/{max_turns})")
    return "generate_queries"

## Graphs

In [13]:
# Initialize Graph
builder = StateGraph(ResearchState)

# Add Nodes
builder.add_node("generate_queries", generate_queries)
builder.add_node("search_web", search_web)
builder.add_node("search_wikipedia", search_wikipedia)
builder.add_node("collect_material", collect_material)

# Add Edges
builder.add_edge(START, "generate_queries")

# Parallel execution: generate_queries -> both search nodes
builder.add_edge("generate_queries", "search_web")
builder.add_edge("generate_queries", "search_wikipedia")

# Fan-in: both search nodes -> collect_material
builder.add_edge("search_web", "collect_material")
builder.add_edge("search_wikipedia", "collect_material")

# Conditional Loop
builder.add_conditional_edges(
    "collect_material", 
    should_continue, 
    ["generate_queries", END]
)

# Compile
research_assistant = builder.compile()

print(research_assistant.get_graph().draw_ascii())


                              +-----------+                         
                              | __start__ |                         
                              +-----------+                         
                                    *                               
                                    *                               
                                    *                               
                          +------------------+                      
                          | generate_queries |                      
                         *+------------------+...                   
                    *****           *            .....              
               *****               *                  .....         
            ***                    *                       .....    
+------------+           +------------------+                   ... 
| search_web |*          | search_wikipedia |              .....    
+------------+ *****     +--------

## Test the Graph

In [14]:
# Helper to load StoryBrief
def load_story_brief(slug: str) -> StoryBrief:
    """Load a StoryBrief from the tmp directory."""
    path = project_root / "tmp" / slug / "story_brief.json"
    if not path.exists():
        raise FileNotFoundError(f"StoryBrief not found at {path}")
    
    with open(path, "r") as f:
        data = json.load(f)
    return StoryBrief(**data)

# 1. Load the brief
# Using the same slug as in your reporter notebook example
story_brief = load_story_brief(slug)

print(f"Loaded Brief: {story_brief.topic}")

# 2. Run the Research Assistant
print("\nStarting Research Assistant...")
initial_state = {
    "story_brief": story_brief,
    "max_num_turns": 3,
    "current_turn": 0,
    "context": [],
    "search_results": []
}

result = research_assistant.invoke(initial_state)

Loaded Brief: Socotra: Inside the ‘Alien’ Island Where Evolution Went Off-Script

Starting Research Assistant...


            id = uuid7()
Future versions will require UUID v7.
  input_data = validator(cls_, input_data)


DEBUG: Continuing research (Turn 2/3)
DEBUG: Continuing research (Turn 3/3)
DEBUG: Max turns (3) reached. Ending research.


## Display Results

In [15]:

# 3. Display Results
print("\n=== FINAL RESEARCH RESULTS ===")
print(f"Total Items Collected: {len(result['search_results'])}")
print(f"Turns Used: {result['current_turn']}")
print("-" * 40)

for i, item in enumerate(result['search_results'], 1):
    print(f"\n[{i}] Source: {item.source}")
    # Truncate content for readability
    content = item.search_result.replace("\n", " ")
    print(f"Content: {content}")


=== FINAL RESEARCH RESULTS ===
Total Items Collected: 27
Turns Used: 3
----------------------------------------

[1] Source: Wikipedia: Socotra
Content: Socotra (Saqatri) is a Yemeni island in the Indian Ocean, largest of six islands in the Socotra archipelago, comprising ~95% of archipelago landmass; lies ~380 km south of the Arabian Peninsula and ~232 km east of the Horn of Africa; island dimensions ~132 km long and 42 km wide.

[2] Source: Wikipedia: Socotra
Content: Inhabitants called Socotrans; languages: Arabic and Soqotri.

[3] Source: Wikipedia: Socotra
Content: High endemism: up to a third of plant life is unique to Socotra; described as 'the most alien-looking place on Earth'; UNESCO World Heritage Site (inscribed 2008).

[4] Source: Wikipedia: Socotra Governorate
Content: Socotra Governorate: administrative unit of Yemen including Socotra and nearby islands (Abd al Kuri, Samhah, Darsah); Socotra contains most population and land area.

[5] Source: Wikipedia: Socotra Governo

In [16]:
def format_research_package_as_markdown(pkg: ResearchPackage) -> str:
    """Format the ResearchPackage into a readable Markdown report."""
    
    md = []
    
    # Header
    md.append(f"**Total Items:** {len(pkg.results)}")
    md.append("\n---\n")
    
    # Results Loop
    if not pkg.results:
        md.append("*No research results collected.*")
    else:
        for i, item in enumerate(pkg.results, 1):
            # Source Header
            source_url = item.source
            # If source is a URL, make it clickable
            if source_url.startswith("http"):
                source_display = f"[{source_url}]({source_url})"
            else:
                source_display = source_url
                
            md.append(f"### {i}. Source: {source_display}")
            
            # Content Block
            # Use blockquote for content to distinguish it
            content = item.search_result.strip()
            md.append(f"> {content}")
            
            md.append("\n---\n")
            
    return "\n".join(md)

In [17]:
from IPython.display import display, Markdown

# Create the ResearchPackage from the graph result
pkg = ResearchPackage(
    results=result["search_results"]
)
# Generate the markdown content
md_content = format_research_package_as_markdown(pkg)

# Display it in the notebook
display(Markdown(md_content))

**Total Items:** 27

---

### 1. Source: Wikipedia: Socotra
> Socotra (Saqatri) is a Yemeni island in the Indian Ocean, largest of six islands in the Socotra archipelago, comprising ~95% of archipelago landmass; lies ~380 km south of the Arabian Peninsula and ~232 km east of the Horn of Africa; island dimensions ~132 km long and 42 km wide.

---

### 2. Source: Wikipedia: Socotra
> Inhabitants called Socotrans; languages: Arabic and Soqotri.

---

### 3. Source: Wikipedia: Socotra
> High endemism: up to a third of plant life is unique to Socotra; described as 'the most alien-looking place on Earth'; UNESCO World Heritage Site (inscribed 2008).

---

### 4. Source: Wikipedia: Socotra Governorate
> Socotra Governorate: administrative unit of Yemen including Socotra and nearby islands (Abd al Kuri, Samhah, Darsah); Socotra contains most population and land area.

---

### 5. Source: Wikipedia: Socotra Governorate
> Geography & climate: islands largely limestone; Hajhir Mountains with exposed granite; lowland arid climate averaging ~150 mm annual precipitation; rainfall mostly in NE monsoon (Nov–Mar); SW monsoon brings dry winds (Apr–Oct); sea fogs/orographic precipitation increase moisture at higher elevations up to ~1000 mm/year; year-round streams only in mountains.

---

### 6. Source: Wikipedia: Socotra Governorate
> Geology & evolutionary history: Socotra is an isolated continental fragment (not volcanic), once part of Gondwana; detached during Miocene rifting that opened Gulf of Aden; geologically linked to Africa.

---

### 7. Source: Wikipedia: Socotra
> History highlights: prehistoric Oldowan lithic culture evidence (stone tools near Hadibo found 2008); role in ancient Indian Ocean trade (Periplus of the Erythraean Sea); Hoq Cave inscriptions in multiple ancient scripts (1st century BC–6th century AD).

---

### 8. Source: Wikipedia: Socotra Governorate
> Historical political control: part of Mahra Sultanate historically; brief Portuguese occupation 1507–1511; later British control (part of Bombay Presidency, then Aden Protectorate) until mid-20th century; attached to Aden Governorate after 1967; moved to Hadhramaut Governorate in 2004; established as its own governorate in Dec 2013.

---

### 9. Source: Wikipedia: Socotra; Socotra Governorate
> Recent governance and conflict: UAE deployed troops and took administrative control of Socotra airport and seaport on 30 April 2018; Saudi troops deployed 14 May 2018 with a deal for joint exercises and return of administrative control to Yemen; Southern Transitional Council (STC), UAE-backed, seized control of the archipelago in June 2020 and currently controls the island according to the summary.

---

### 10. Source: Wikipedia: Socotra
> Socotra (Saqatri) is a Yemeni island in the Indian Ocean, largest of six islands in the Socotra Archipelago, comprising ~95% of archipelago landmass; lies ~380 km south of Arabian Peninsula and ~232 km east of Horn of Africa; island dimensions: 132 km long, 42 km wide.

---

### 11. Source: Wikipedia: Socotra
> Inhabitants called Socotrans; languages: Arabic and Soqotri.

---

### 12. Source: Wikipedia: Socotra
> High level of endemism: up to one-third of plant life is unique to Socotra; island described as 'the most alien-looking place on Earth.'

---

### 13. Source: Wikipedia: Socotra
> Socotra was inscribed as a UNESCO World Heritage Site in 2008.

---

### 14. Source: Wikipedia: Socotra Governorate
> Socotra Governorate: administrative history—part of Mahra Sultanate, British administration (Bombay Presidency until 1937, then Aden Protectorate), attached to Aden Governorate after 1967 independence, moved to Hadhramaut Governorate in 2004, became its own governorate in December 2013.

---

### 15. Source: Wikipedia: Socotra Governorate
> Geography of archipelago: four larger islands (Socotra, Abd al Kuri, Samhah, Darsah) plus 3 islets; coordinates and positions relative to Horn of Africa and Arabian Peninsula; islands largely limestone with Hajhir Mountains (exposed granite).

---

### 16. Source: Wikipedia: Socotra Governorate
> Climate: lowlands arid with ~150 mm annual precipitation; most rain during northeast monsoon (Nov–Mar); southwest monsoon brings dry winds Apr–Oct; mountain/fog/orographic precipitation up to ~1000 mm annually in Hajhir Mountains; year-round rivers only in mountains.

---

### 17. Source: Wikipedia: Socotra Governorate
> Geology/evolutionary background: Socotra is a continental fragment (not volcanic), once part of Gondwana, detached during the Miocene rifting that opened the Gulf of Aden—explains long isolation and link to African continental geology.

---

### 18. Source: Wikipedia: Socotra; Socotra Governorate
> Recent governance incidents: UAE deployed troops and took administrative control of Socotra airport and seaport on 30 April 2018; Saudi troops deployed 14 May 2018 and an agreement returned administrative control to Yemen; Southern Transitional Council (STC) seized control of the archipelago in June 2020; STC described as UAE-backed secessionist faction.

---

### 19. Source: [https://pmc.ncbi.nlm.nih.gov/articles/PMC6169433/](https://pmc.ncbi.nlm.nih.gov/articles/PMC6169433/)
> From PMC article (2018?): Dracaena cinnabari (dragon's blood tree) is endemic and the island's most iconic plant; occurs between ~300 and 1480 m elevation; Socotra separated from the continent during the Tertiary, producing high floral endemism.

---

### 20. Source: [https://www.mdpi.com/1999-4907/13/8/1276](https://www.mdpi.com/1999-4907/13/8/1276)
> MDPI Forests (2022?) — 'Identifying Suitable Restoration and Conservation Areas for Dracaena cinnabari': Notes dragon trees are arborescent Dracaena; most dragon tree species endangered and often insular with low abundances. Gives an estimated population of D. cinnabari at ca. 80,000 individuals and refers to red resin 'dragon's blood'. Authors listed (Marcelo Rezende, Petr Maděra, Petr Vahalík, Kay Van Damme, Hana Habrová).

---

### 21. Source: [https://www.mdpi.com/1999-4907/14/4/840](https://www.mdpi.com/1999-4907/14/4/840)
> MDPI Forests (2023?) — notes D. cinnabari is endemic; Socotra has about 37% endemic vascular plants. Socotra is a UNESCO Biosphere Reserve (2003). Cites Adolt et al. inventory of D. cinnabari in Firmihin (2010–2011) using 107 circular plots (~25 m radius) across ~700 ha, measuring trees >1.3 m and recording DBH and branch orders (monitoring methodology). Research funded by Mendel University IGA grant.

---

### 22. Source: [https://en.wikipedia.org/wiki/Socotra](https://en.wikipedia.org/wiki/Socotra)
> Wikipedia — Socotra overview: Yemeni island in Indian Ocean; largest of six islands in archipelago (~95% landmass); lies ~380 km south of Arabian Peninsula and ~232 km east of Horn of Africa; inhabitants Socotrans speak Arabic and Soqotri. Up to a third (≈33%) of plant life unique (other sources state 37%); island dimensions 132 km by 42 km. UNESCO World Heritage Site inscribed in 2008. Described as 'the most alien-looking place on Earth.'

---

### 23. Source: [https://en.wikipedia.org/wiki/Socotra_Governorate](https://en.wikipedia.org/wiki/Socotra_Governorate)
> Wikipedia — Socotra Governorate: Archipelago includes Socotra, Abd al Kuri, Samhah, Darsah and islets. Climate: lowlands ~150 mm annual precipitation; high elevations up to 1000 mm due to fog/orographic precipitation; northeast monsoon Nov–Mar brings most rain; southwest monsoon Apr–Oct brings dry winds. Geology: continental fragment from Gondwana, detached during Miocene rifting that opened Gulf of Aden.

---

### 24. Source: [https://en.wikipedia.org/wiki/Socotra_Governorate](https://en.wikipedia.org/wiki/Socotra_Governorate)
> Wikipedia — governance timeline highlights: UAE deployed troops and took administrative control of airport and seaport on 30 April 2018; Saudi deployed troops 14 May 2018 and brokered a deal returning control to Yemen; Southern Transitional Council seized archipelago in June 2020.

---

### 25. Source: MDPI articles (links above)
> MDPI and PMC sources reference prior inventories and monitoring efforts (Adolt et al. 2010–2011) and the existence of restoration/conservation planning work for D. cinnabari (authors and modeling approaches indicated).

---

### 26. Source: [https://www.mdpi.com/1999-4907/13/8/1276](https://www.mdpi.com/1999-4907/13/8/1276)
> From MDPI: Dragon tree species globally are often endangered, with insular populations and low abundances — providing context for conservation urgency for D. cinnabari on Socotra.

---

### 27. Source: Combined PMC/MDPI sources
> PMC article and MDPI pieces confirm Socotra's high endemism, geological isolation, and that D. cinnabari occupies mid-to-high elevations where fog/orographic moisture occurs — linking species distribution to island climate/geography.

---


In [18]:
import json

def save_research_package(pkg: ResearchPackage, slug: str):
    """Save the research package to disk as both JSON and Markdown."""
    
    # Define output directory
    output_dir = project_root / "tmp" / slug
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # 1. Save JSON
    json_path = output_dir / "research_package.json"
    with open(json_path, "w") as f:
        f.write(pkg.model_dump_json(indent=2))
    print(f"✓ Saved JSON to: {json_path}")
        
    # 2. Save Markdown
    md_path = output_dir / "research_package.md"
    md_content = format_research_package_as_markdown(pkg)
    with open(md_path, "w") as f:
        f.write(md_content)
    print(f"✓ Saved Markdown to: {md_path}")

# Execute the save
save_research_package(pkg, slug)

✓ Saved JSON to: /Users/juha/development/semantic-byte/ai-engineering/agentic-newsroom/tmp/socotra_inside_the_alien_island/research_package.json
✓ Saved Markdown to: /Users/juha/development/semantic-byte/ai-engineering/agentic-newsroom/tmp/socotra_inside_the_alien_island/research_package.md
