In [4]:
# import getpass
import os
import bs4
from os import path
os.environ['USER_AGENT'] = 'myagent'
from langchain import hub
from langchain_ollama import ChatOllama
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyMuPDFLoader
# from langchain_unstructured import UnstructuredLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# os.environ["LANGSMITH_TRACING"] = "false"

In [2]:
embeddings = OllamaEmbeddings(model="deepseek-r1:70b")
vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory="./chroma_db/earthquake/",
)

In [14]:
# Load and chunk contents of the blog
#loader = WebBaseLoader(
#    web_paths=("http://www.dengzhou.gov.cn/dzszj/zjxw/zwyw/webinfo/2024/11/1723956574402713.htm",),
#    bs_kwargs=dict(
#        parse_only=bs4.SoupStrainer(
#            class_=("post-content", "post-title", "post-header")
#        )
#    ),
#)
libdir = "../library/Earthquake"
papers = []
for f in os.listdir(libdir):
    if f.endswith(".pdf"):
        papers.append(path.normpath(path.abspath(path.join(libdir, f))))
docs = []
for f in papers:
    loader = PyMuPDFLoader(
        f,
        mode = 'single'
)
    docs += loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
all_splits = text_splitter.split_documents(docs)

In [None]:
# Index chunks
_ = vector_store.add_documents(documents=all_splits)

In [4]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

llm = ChatOllama(
    model="deepseek-r1:70b",
    temperature=0.0,
    num_predict=-1
)

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [7]:
response = graph.invoke({"question": "compare structurally connected interferometer and separated interferometer"})
print(response["answer"])

<think>
Okay, so I need to compare structurally connected interferometers and separated interferometers based on the provided context. Let me go through the context step by step.

First, looking at section 5.6.4 about formation flying spacecraft, it mentions that free-flying systems are inherently uncoupled. This suggests that separated interferometers don't have a physical connection between the components, which makes sense because they're on different spacecraft. The synchronization strategy here involves maintaining a desired baseline length, which is crucial for interferometry as it affects resolution.

In contrast, structurally connected interferometers would have their components physically linked, perhaps on the same platform or structure. This likely provides mechanical stability and easier alignment since everything is fixed in place. However, this setup might limit flexibility, especially if you need to adjust the baseline length for different observations.

The context also

In [9]:
all_splits

[Document(metadata={'producer': 'Adobe Acrobat 9.41 Paper Capture Plug-in', 'creator': 'Adobe Acrobat 9.0', 'creationdate': '2011-06-21T10:06:33+08:00', 'source': '/Users/huo/Projects/ai4s/library/Earthquake/earthquake-processes-physical-modelling-numerical-simulation-and-data-analysis-part1.pdf', 'file_path': '/Users/huo/Projects/ai4s/library/Earthquake/earthquake-processes-physical-modelling-numerical-simulation-and-data-analysis-part1.pdf', 'total_pages': 263, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2012-10-26T07:47:17+08:00', 'trapped': '', 'modDate': "D:20121026074717+08'00'", 'creationDate': "D:20110621100633+08'00'", 'page': 2}, page_content="Earthquake Processes: Physical \nModelling, Numerical Simulation \nand Data Analysis \nPart I \nEdited by \nMitsuhiro Matsu'ura \nPeterMora \nAndrea DonneUan \nXiang-chu Yin \nSpringer Basel AG"),
 Document(metadata={'producer': 'Adobe Acrobat 9.41 Paper Capture Plug-in', 'creator': 'Adobe 