In [28]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from tqdm import tqdm
from pathlib import Path
import json
import os
from src.agents.dimension_extractor import DimensionExtractor
import pandas as pd

In [29]:
load_dotenv()

anthropic = init_chat_model(
    "anthropic:claude-haiku-4-5",
    temperature=0.5,
    timeout=30,
    max_tokens=5000,
)

In [30]:
DATA_DIR    = "data"
PARSED_DIR  = "data/papers"
OUTPUT_DIR = "data/output"
GOLD_DIR    = "data/evaluation"

In [31]:
LLM_MODEL = anthropic

In [32]:
extractor_agent = DimensionExtractor(LLM_MODEL)

In [33]:
# Load JSON data
def parse_json(doc_id: str):
    parsed_json_path = os.path.join(PARSED_DIR, doc_id + ".pdf.json")
    with open(parsed_json_path, "r") as f:
        parsed_json = json.load(f)
    
    return parsed_json

In [34]:
def analyze_paper(doc_id:str):

    tqdm.write(f"Analyzing paper ID: {doc_id}")
    output_path = os.path.join(OUTPUT_DIR, doc_id + ".pdf.json")

    parsed_json = parse_json(doc_id)

    tqdm.write("Analysing dimensions...")
    content_output = extractor_agent.go_to_work(user_instructions=f"Please analyse and extract the following input:", input_data=parsed_json)

    with open(output_path, "w") as f:
        json.dump(content_output, f, indent=4, ensure_ascii=False)

In [35]:
def assembly_csv():
    doc_ids = sorted([f.stem for f in Path(OUTPUT_DIR).glob("*.json")])

    rows = []

    for doc_id in tqdm(doc_ids, desc="Assembling CSV"):
        pass

    # Create DataFrame
    df = pd.DataFrame(rows)

    # Save CSV
    df.to_csv("data/output.csv", index=False, sep=";", encoding="utf-8-sig")

In [36]:
DOC_IDS = sorted([id.removesuffix('.pdf.json') for id in os.listdir(PARSED_DIR)])

for doc_id in tqdm(DOC_IDS):
    analyze_paper(doc_id)

  0%|          | 0/2 [00:00<?, ?it/s]

Analyzing paper ID: 1.Many hands make light work
Analysing dimensions...


 50%|█████     | 1/2 [00:05<00:05,  5.92s/it]

Analyzing paper ID: 100.Agent-Based Trust and Reputation Model in Smart IoT Environments
Analysing dimensions...


100%|██████████| 2/2 [00:13<00:00,  6.60s/it]
