In [1]:
documents = ["bmad_doc/bmad-manual.pdf"]

In [7]:
%load_ext autoreload
%autoreload 1
import glob
import os
%aimport clean

tex_files = glob.glob("bmad_doc/*.tex")
for file in tex_files:
    filename_base = os.path.splitext(os.path.basename(file))[0]
    
    # Create the output file path
    output_file = os.path.join("clean_bmad_doc", f"{filename_base}.txt")
    
    # Process the file
    clean.clean_latex_file(file, output_file)

In [8]:
from langchain.document_loaders import DirectoryLoader, TextLoader

# Example: Load all PDFs from a directory
loader = DirectoryLoader(
    "clean_bmad_doc",
    glob="**/*.txt",
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"}
)
documents = loader.load()

In [17]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2048,  # Adjust based on your needs
    chunk_overlap=100,
    separators=[
        
        "\n\n",
        "\n",
        " ",
        ""
    ]
)
texts = text_splitter.split_documents(documents)

In [18]:
print(texts[3].page_content)

The general syntax for an expression based "group" element is
  name: GROUP = {ele1[attrib1]:exp1, ele2[attrib2]:exp2, ...}, 
          VAR = {var1, var2, ...}, var1 = init_val1, 
          old_var1 = old_init_val1, ..., GANG = logical, ...
For an expression based "overlay" element, the syntax is identical except OVERLAY is
substituted for GROUP and there are no old values to set:
  name: OVERLAY = {ele1[attrib1]:exp1, ele2[attrib2]:exp2, ...}, 
          VAR = {var1, var2, ...}, var1 = init_val1, GANG = logical, ...
For an expression based "ramper" element, the syntax is identical to the "overlay" element
except "RAMPER" is used in place of "OVERLAY" and there is no "GANG" attribute.

In the above, "Name" is the name of the controller element, "ele1", "ele2", ... are the
elements whose attributes are to be controlled, "attrib1", "attrib2", etc. are the controlled
attributes (called "slave" attributes), "var1", "var2", etc. are the control variables, and
"exp1", "exp2", etc. are the ar

In [ ]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"  # Lightweight local model
)

In [ ]:
from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(texts, embeddings)
vector_store.save_local("faiss_tex")

In [ ]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vector_store = FAISS.load_local("faiss_tex", embeddings,allow_dangerous_deserialization=True)

In [2]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})  # Fetch top k results

In [5]:
# Test query
query = "quadrupole tracking methods"
results = retriever.invoke(query)

# Check retrieved text
for doc in results:
    print('\n!--------------------\n')
    print(doc.page_content)



!--------------------

%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------
\section{Quadrupole Tracking}
\label{s:quadrupole.std}
\index{quadrupole}

The \vn{bmad_standard} calculates the transfer map through an upright
quadrupole and then transforms that map to the laboratory frame.

!--------------------

\begin{example}
  tracking_method      = <Switch>   ! phase space tracking method.
  mat6_calc_method     = <Switch>   ! 6x6 transfer matrix calculation.
  spin_tracking_method = <Switch>   ! Spin tracking method.
\end{example}
Example:
\begin{example}
  q2: quadrupole, tracking_method = symp_lie_ptc
  q2[tracking_method] = symp_lie_ptc
  quadrupole::*[tracking_method] = symp_lie_ptc
\end{example}
The first two lines of this example have exactly the same effect in terms of setting the
\vn{tracking_method}. The third line shows how to set the \vn{tracking_method} for an 