In [None]:
# ENVIRONMENT

from aisurveywriter.tasks import ReferenceExtractor
from aisurveywriter.core.llm_handler import LLMHandler
import aisurveywriter.core.file_handler as fh
from aisurveywriter.utils import get_all_files_from_paths
from aisurveywriter.core.pipeline import PaperPipeline
from aisurveywriter.core.paper import PaperData
import aisurveywriter.tasks as tks

import os
os.environ["GOOGLE_API_KEY"]=fh.read_credentials("../credentials.yaml")["google_key"]

# llm = LLMHandler(model="qwen2.5:14b", model_type="ollama", temperature=0.5)
prompts = fh.read_yaml("../templates/prompt_config.yaml")
review = fh.read_yaml("../templates/review_config.yaml")

In [None]:
# EXTRACT PDF IMAGES
from aisurveywriter.core.pdf_processor import PDFProcessor
from aisurveywriter.utils import get_all_files_from_paths

pdfs = PDFProcessor(get_all_files_from_paths("../refexamples"))
pdfs.extract_images(save_dir="../bib/imgs")

In [None]:
from aisurveywriter.tasks.paper_faiss_ref import PaperFAISSReferencer, PaperData
from aisurveywriter.core.text_embedding import load_embeddings
from aisurveywriter.core.latex_handler import write_latex

paper = PaperData.from_tex("../out/generated-rev.tex", subject="Langmuir and Langmuir-Blodgett films")
paper.sections = paper.sections[1:]

embed = load_embeddings("dunzhang/stella_en_1.5B_v5", "huggingface")
ref = PaperFAISSReferencer(embed, "../bib/refextract-21papers.bib", local_faissdb_path="temp/stella_en_1.5B_v5", save_usedbib_path="temp/test.bib", 
                           save_faiss_path="temp/stella_en_1.5B_v5", max_per_section=60, max_per_sentence=1,confidence=0.7)

paper = ref.reference(paper)
write_latex("../templates/paper_template.tex", paper, "temp/test.tex", bib_path="temp/test.bib")

In [None]:
from aisurveywriter.tasks.figure_extractor import FigureExtractor, PaperData
from aisurveywriter.core.llm_handler import LLMHandler
from aisurveywriter.core.file_handler import read_yaml
from aisurveywriter.core.text_embedding import load_embeddings
from aisurveywriter.utils import get_all_files_from_paths

llm = LLMHandler(model="gemini-2.0-flash", model_type="google")
embed = load_embeddings("dunzhang/stella_en_1.5B_v5", "huggingface")
fig = FigureExtractor(llm, embed, "Langmuir and Langmuir-Blodgett films", pdf_paths=get_all_files_from_paths("../refexamples", stem_sort=True), 
                      save_dir="../bib/allimgs", faiss_save_path="../bib/stella_en_1.5B_v5-allimgfaiss", local_faiss_path=None, paper=None, request_cooldown_sec=0)
faiss = fig._imgdata_faiss(read_yaml("../bib/refextract-allimgdata.yaml")["data"], faiss_save_path="../bib/stella_en_1.5B_v5-allimgfaiss")

In [None]:
from aisurveywriter.core.file_handler import read_yaml
import os
import shutil
import yaml

img_data = read_yaml("../bib/filtered-imgdata.yaml")
indir = "../bib/filteredimgs"
nochu = "./nochuimgs"
os.makedirs(nochu, exist_ok=True)
no_chu_data = []
for i, img in enumerate(img_data["data"]):
    if "OliveiraO2022" in img["path"]:
        continue
    
    no_chu_data.append(img)
    
with open("nochu-imgdata.yaml", "w", encoding="utf-8") as f:
    yaml.safe_dump(no_chu_data, f)