In [4]:
# import getpass
import os
import bs4
from os import path
os.environ['USER_AGENT'] = 'myagent'
from langchain import hub
from langchain_ollama import ChatOllama
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyMuPDFLoader
# from langchain_unstructured import UnstructuredLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
os.environ["LANGSMITH_TRACING"] = "false"

In [21]:
del vector_store
embeddings = OllamaEmbeddings(model="deepseek-r1:70b")
vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory="./chroma_db/earthquake/",
)

In [22]:
# Load and chunk contents of the blog
#loader = WebBaseLoader(
#    web_paths=("http://www.dengzhou.gov.cn/dzszj/zjxw/zwyw/webinfo/2024/11/1723956574402713.htm",),
#    bs_kwargs=dict(
#        parse_only=bs4.SoupStrainer(
#            class_=("post-content", "post-title", "post-header")
#        )
#    ),
#)
libdir = "../library/Earthquake"
papers = []
for f in os.listdir(libdir):
    if f.endswith(".pdf"):
        papers.append(path.normpath(path.abspath(path.join(libdir, f))))
docs = []
for f in papers:
    loader = PyMuPDFLoader(
        f,
        mode = 'single'
)
    docs += loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4096, chunk_overlap=256)
all_splits = text_splitter.split_documents(docs)

In [23]:
# Index chunks
_ = vector_store.add_documents(documents=all_splits)

In [24]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

llm = ChatOllama(
    model="deepseek-r1:70b",
    temperature=0.0,
    num_predict=-1
)

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [25]:
response = graph.invoke({"question": "介绍地震过程的物理模型、数值仿真与数据分析。"})
print(response["answer"])

<think>
Okay, so I'm trying to understand this problem about finite element modeling in geophysics. The user provided some text that seems to be from a research paper or something similar, discussing tectonic plates, GPS observations, and deformation in northeast Japan. They also mentioned figures showing plate boundaries and displacement rates.

First, I need to figure out what exactly the question is asking for. It just says "Answer:" after providing all that context. Maybe they want an explanation of how finite element modeling (FEM) applies to the geophysical data presented? Or perhaps they're looking for a step-by-step process on setting up such a model?

I remember that FEM is a numerical method used to solve partial differential equations, which is common in various fields like engineering and geosciences. In geophysics, it's often used to model things like stress distributions, deformation of the Earth's crust, or fluid flow.

Looking at the context provided, they talk about su