In [1]:
from aisurveywriter.tasks import ReferenceExtractor
from aisurveywriter.core.llm_handler import LLMHandler
import aisurveywriter.core.file_handler as fh
from aisurveywriter.utils import get_all_files_from_paths
from aisurveywriter.core.pipeline import PaperPipeline
from aisurveywriter.core.paper import PaperData
import aisurveywriter.tasks as tks

import os
os.environ["GOOGLE_API_KEY"]=fh.read_credentials("../credentials.yaml")["google_key"]

# llm = LLMHandler(model="qwen2.5:14b", model_type="ollama", temperature=0.5)
prompts = fh.read_yaml("../templates/prompt_config.yaml")
review = fh.read_yaml("../templates/review_config.yaml")


# ref = tks.ReferenceExtractor(llm, ref_paths=["../refexamples/ArigaK2023_ChemOfMat.pdf"],
#                              prompt=prompts["reference_extract_prompt"],
#                              raw_save_path="raw.ref", rawbib_save_path="raw.bib",
#                              bib_save_path="final.bib", cooldown_sec=0)
# ref()

In [2]:
import bibtexparser
from langchain_community.docstore.document import Document


# Load the .bib file
def load_bibtex_entries(bib_file):
    with open(bib_file, "r", encoding="utf-8") as f:
        bib_database = bibtexparser.load(f)

    entries = []
    for entry in bib_database.entries:
        title = entry.get("title", "No Title")
        abstract = entry.get("abstract", "")
        keywords = entry.get("keywords", "")
        authors = entry.get("author", "")

        # Combine relevant fields for embedding
        content = f"Title: {title}\nAbstract: {abstract}\nKeywords: {keywords}\nAuthors: {authors}"
        entries.append(Document(page_content=content, metadata={"citation_key": entry.get("ID", "Unknown")}))
    
    return entries

# Path to your .bib file
bib_file_path = "../out/generated-bibdb.bib"
documents = load_bibtex_entries(bib_file_path)


In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter

embedding_model = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store = FAISS.from_documents(docs, embedding_model)
vector_store.save_local("googleembed_faiss_bib_db")

In [None]:
%pip install faiss-cpu

In [7]:
from aisurveywriter.core.paper import PaperData

paper = PaperData.from_tex("../out/generated.tex", subject="Langmuir and Langmuir-Blodgett films")

In [None]:
print(paper)

In [None]:
results = vector_store.similarity_search(paper.sections[4].content, k=30)
print(results[-22])

In [None]:
from aisurveywriter.tasks.paper_faiss_ref import PaperFAISSReferencer, PaperData
from aisurveywriter.core.text_embedding import load_embeddings

paper = PaperData.from_tex("../out/generated.tex", subject="Langmuir and Langmuir-Blodgett films")
paper.sections = paper.sections[1:]

embed = load_embeddings("Alibaba-NLP/gte-Qwen2-1.5B-instruct", "huggingface")
ref = PaperFAISSReferencer(embed, "../bib/refextract-21papers.bib", local_faissdb_path="../bib/qwen2-1.5b", save_usedbib_path="temp/test.bib", 
                           save_faiss_path="temp/qwen2-1.5b", max_per_section=40, max_per_sentence=1,confidence=0.9)

paper = ref.reference(paper)

In [None]:
paper = PaperData.from_tex("../out/generated.tex")
print([s.title for s in paper.sections])

In [None]:
print(paper.full_content())

In [4]:
from aisurveywriter.core.latex_handler import write_latex

write_latex("../templates/paper_template.tex", paper, "temp/test.tex", "temp/test.bib")

In [None]:
print(paper)