### üì¶ Installing Required Packages

To ensure all dependencies for the project are installed, we use the following command:

In [None]:
%pip install -r requirements.txt

### üõ†Ô∏è Import Required Modules and Classes

This block loads all essential modules and classes from the `sokegraph` package and its dependencies. 

In [None]:
from sokegraph.sources.base_paper_source import BasePaperSource
from sokegraph.sources.semantic_scholar_source import SemanticScholarPaperSource
from sokegraph.sources.pdf_paper_source import PDFPaperSource
from sokegraph.ranking.paper_ranker import PaperRanker
from sokegraph.graph.knowledge_graph import KnowledgeGraph
from sokegraph.util.logger import LOG
from sokegraph.agents.ai_agent import AIAgent
from sokegraph.agents.openai_agent import OpenAIAgent
from sokegraph.agents.gemini_agent import GeminiAgent
from sokegraph.ontology.ontology_updater import OntologyUpdater
from sokegraph.graph.neo4j_knowledge_graph import Neo4jKnowledgeGraph
from sokegraph.agents.llama_agent import LlamaAgent
from sokegraph.agents.ollama_agent import OllamaAgent
from sokegraph.agents.claude_agent import ClaudeAgent
from sokegraph.sources.journal_api_source import JournalApiPaperSource
from sokegraph.graph.networkx_knowledge_graph import NetworkXKnowledgeGraph
import json

# ‚ú® User Input Form

This step initializes the **interactive interface** for entering parameters.  

‚û°Ô∏è First, you will be prompted to **select the paper source**.  
‚û°Ô∏è After that, the form will expand to show the **remaining input fields** needed for the pipeline.  

> üí° Use this form to configure all inputs before running the pipeline.


In [None]:
import importlib
import sokegraph.ui.ui_inputs
importlib.reload(sokegraph.ui.ui_inputs)
from sokegraph.ui.ui_inputs import SOKEGraphUI

# Create UI instance
ui = SOKEGraphUI()

# Display the UI in the notebook
ui.display_ui()


## üß† Step 0: Select and Initialize AI Agent

- The pipeline selects the appropriate AI agent based on your UI choice (`ui.params.AI`).
- Supported providers: `openAI`, `gemini`, `llama`, `ollama`, `claude`.
- For all except `ollama`, an API key file is required.

**How it works:**
- The code checks your selection and creates the corresponding agent instance.
- If an unsupported provider is chosen, it raises an error.

> **Tip:**  
> Make sure your API key file exists and is valid for the selected provider (except `ollama`).


In [None]:
LOG.info("üöÄ Starting Full Pipeline")

# 0. Setup AI agent
ai_tool: AIAgent
if ui.params.AI == "openAI":
    ai_tool = OpenAIAgent(ui.params.api_keys_file, ui.params.field_of_interest)
elif ui.params.AI == "gemini":
    ai_tool = GeminiAgent(ui.params.api_keys_file, ui.params.field_of_interest)
elif ui.params.AI == "llama":
    ai_tool = LlamaAgent(ui.params.api_keys_file, ui.params.field_of_interest)
elif ui.params.AI == "ollama":
    ai_tool = OllamaAgent(ui.params.field_of_interest)
elif ui.params.AI == "claude":
    ai_tool = ClaudeAgent(ui.params.api_keys_file, ui.params.field_of_interest)
else:
    raise ValueError(f"Unsupported AI provider: {ui.params.AI}")

## üìÑ Step 1: Select and Fetch Papers

- Chooses the paper source based on your UI selection:
  - **Semantic Scholar:** Needs `number_papers` and `paper_query_file`
  - **PDF Zip:** Needs `pdfs_file`
  - **Journal API:** Needs `paper_query_file` and `api_key_file`
- Logs an error if required inputs are missing or the source is invalid.
- Calls `fetch_papers()` to get the papers for the next steps.

> ‚ö†Ô∏è Make sure you provide the correct files and parameters for your chosen source.

In [None]:
# 1. Select paper source
paper_source: BasePaperSource

if ui.params.paper_source == "Semantic Scholar":
    if not ui.params.number_papers or not ui.params.paper_query_file:
        LOG.error("‚ùå 'number_papers' and 'paper_query_file' are required for Semantic Scholar source.")
    else:
        paper_source = SemanticScholarPaperSource(
            num_papers=int(ui.params.number_papers),
            query_file=ui.params.paper_query_file,
            output_dir=ui.params.output_dir
        )

elif ui.params.paper_source == "PDF Zip":
    if not ui.params.pdfs_file:
        LOG.error("‚ùå 'pdfs_file' (ZIP file) is required for PDF source.")
    else:
        paper_source = PDFPaperSource(
            zip_path=ui.params.pdfs_file,
            output_dir=ui.params.output_dir
        )

elif ui.params.paper_source == "Journal API":
    if not ui.params.paper_query_file or not ui.params.api_key_file:
        LOG.error("‚ùå 'paper_query_file' and 'api_key_file' are required for Journal API source.")
    else:
        paper_source = JournalApiPaperSource(
            query_file=ui.params.paper_query_file,
            api_key_file=ui.params.api_key_file,
            output_dir=ui.params.output_dir
        )

else:
    LOG.error("‚ùå Invalid or unsupported paper source selected.")
    paper_source = None

# 2. Fetch papers
if paper_source:
    papers_path = paper_source.fetch_papers()
else:
    papers_path = ""


## üß† Step 2: Update Ontology

- Uses the retrieved papers and selected AI agent to enrich the ontology file.
- The updated ontology is saved for the next steps.

In [None]:
import importlib
import sokegraph.agents.openai_agent
importlib.reload(sokegraph.agents.openai_agent)
from sokegraph.agents.openai_agent import OpenAIAgent
import sokegraph.agents.ai_agent
importlib.reload(sokegraph.agents.ai_agent)
from sokegraph.agents.ai_agent import AIAgent
import sokegraph.ontology.ontology_updater
importlib.reload(sokegraph.ontology.ontology_updater)
from sokegraph.ontology.ontology_updater import OntologyUpdater
# 2. Update ontology
ontology_updater = OntologyUpdater(ui.params.ontology_file, papers_path, ai_tool, ui.params.output_dir)  # or however you instantiate it
with open(ui.params.paper_query_file, "r", encoding="utf-8") as f:
    user_keywords = [ln.strip() for ln in f if ln.strip()]

# this is updating the base ontology
custom_prompt = None
OntologyUpdater.enrich_base_with_keywords(
    base_ontology_path=str(ui.params.ontology_file),
    keywords=user_keywords,
    ai_tool=ai_tool,
    prompt_overrides=custom_prompt,
)

updated_ontology_path = ontology_updater.enrich_with_papers()

## üï∏ Step 3: Build the Knowledge Graph

- Builds a knowledge graph from the enriched ontology.
- Supports two backends:
  - **Neo4j:** Uses credentials from a JSON file.
  - **NetworkX:** Builds an in-memory graph.
- The graph is created and ready for exploration or analysis.

In [None]:
import importlib
import sokegraph.graph.knowledge_graph
import sokegraph.graph.networkx_knowledge_graph
importlib.reload(sokegraph.graph.knowledge_graph)
importlib.reload(sokegraph.graph.networkx_knowledge_graph)
from sokegraph.graph.networkx_knowledge_graph import NetworkXKnowledgeGraph

# 3. Build knowledge graph
LOG.info(" Building knowledge graph ....")


#### build graph
graph_builder: KnowledgeGraph
if(ui.params.kg_type == "neo4j"):
    ### load
    with open(ui.params.kg_credentials_file, "r") as f:
        credentials = json.load(f)
    graph_builder = Neo4jKnowledgeGraph(ontology_updater.output_path, 
                                        credentials["neo4j_uri"],
                                        credentials["neo4j_user"],
                                        credentials["neo4j_pass"])
elif(ui.params.kg_type == "networkx"):
    graph_builder = NetworkXKnowledgeGraph(ontology_updater.output_path)

graph_builder.build_graph()


LOG.info("üéâ Pipeline Completed Successfully")

In [None]:
from sokegraph.graph.networkx_knowledge_graph import NetworkXKnowledgeGraph
from sokegraph.graph.knowledge_graph import KnowledgeGraph
onotology_file = "external/output/updated_ontology.json"
graph_builder: KnowledgeGraph
graph_builder = NetworkXKnowledgeGraph(onotology_file)

graph_builder.build_graph()



In [None]:

graph_builder.show_subgraph_interactive()  # opens the dropdown UI in Jupyter

## üìä Step 4: Rank Papers Based on Ontology and Keywords

- Instantiates `PaperRanker` with the AI tool, fetched papers, enriched ontology, keywords file, and output directory.
- Calls `rank_papers()` to score and rank papers by relevance to your research interests.

In [None]:
# 4. Rank papers
#LOG.info("ranking papers ....")
import importlib
import sokegraph.ranking.paper_ranker
importlib.reload(sokegraph.ranking.paper_ranker)
from sokegraph.ranking.paper_ranker import PaperRanker
ranker = PaperRanker(ai_tool, papers_path, ontology_updater.output_path, ui.params.keywords_file, ui.params.output_dir)
results_csv, results_all = ranker.rank_papers()