In [74]:
from dotenv import load_dotenv
import os
from typing_extensions import List, TypedDict

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from langchain_chroma import Chroma

from langchain import hub
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage
from langchain_core.documents import Document
from langchain_core.tools import tool

from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode, tools_condition


load_dotenv()

True

In [75]:
def initialization(file: str):    
    llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_JEY"))
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=os.getenv("OPENAI_API_JEY"))
    vector_store = Chroma(embedding_function=embeddings)
    loader = PyPDFLoader(file_path=file, extract_images=True)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    docs = loader.load()
    all_splits = text_splitter.split_documents(docs)
    _ = vector_store.add_documents(documents=all_splits)
    
    graph_builder = StateGraph(MessagesState)
    
    @tool(response_format="content_and_artifact")
    def retrieve(query: str):
        """Retrieve information related to a query."""
        retrieved_docs = vector_store.similarity_search(query, k=2)
        serialized = "\n\n".join(
            (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
            for doc in retrieved_docs
        )
        return serialized, retrieved_docs
    
    def query_or_respond(state: MessagesState):
        llm_with_tools = llm.bind_tools([retrieve])
        response = llm_with_tools.invoke(state["messages"])
        return {"messages": [response]}
    
    tools = ToolNode([retrieve])
    
    def generate(state: MessagesState):
        """Generate answer."""
        # Get generated ToolMessages
        recent_tool_messages = []
        for message in reversed(state["messages"]):
            if message.type == "tool":
                recent_tool_messages.append(message)
            else:
                break
        tool_messages = recent_tool_messages[::-1]
    
        # Format into prompt
        docs_content = "\n\n".join(doc.content for doc in tool_messages)
        system_message_content = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know. Use three sentences maximum and keep the "
            "answer concise."
            "\n\n"
            f"{docs_content}"
        )
        conversation_messages = [
            message
            for message in state["messages"]
            if message.type in ("human", "system")
            or (message.type == "ai" and not message.tool_calls)
        ]
        prompt = [SystemMessage(system_message_content)] + conversation_messages
    
        # Run
        response = llm.invoke(prompt)
        return {"messages": [response]}
    
    graph_builder.add_node(query_or_respond)
    graph_builder.add_node(tools)
    graph_builder.add_node(generate)
    
    graph_builder.set_entry_point("query_or_respond")
    graph_builder.add_conditional_edges(
        "query_or_respond",
        tools_condition,
        {END: END, "tools": "tools"},
    )
    graph_builder.add_edge("tools", "generate")
    graph_builder.add_edge("generate", END)
    
    graph = graph_builder.compile()
    
    return graph

In [76]:
from IPython.display import Image, display

graph = initialization()
display(Image(graph.get_graph().draw_mermaid_png()))



TypeError: initialization() missing 1 required positional argument: 'file'

In [77]:
input_message = "Hello"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Hello

Hello! How can I assist you today?


In [78]:
input_message = "What is Money laundering?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


What is Money laundering?
Tool Calls:
  retrieve (call_bbG2dXDjnapLb88fvCOGRKXF)
 Call ID: call_bbG2dXDjnapLb88fvCOGRKXF
  Args:
    query: What is money laundering?
Name: retrieve

Source: {'page': 0, 'source': 'data/AML_IEEE_ACCESS_2024.pdf'}
Content: I. INTRODUCTION
Money laundering is a globally challenging economic
concern. TheUN Vienna 1988 Convention describes it as
‘‘the conversion or transfer of property, knowing that such
property is derived from any offence (s), to conceal or
The associate editor coordinating the review of this manuscript and
approving it for publication was Ines Domingues
.
disguise the illicit origin of the property or of assisting any
person who is involved in such offence (s) to evade the legal
consequences of his actions. ’’. Being a global issue, money
laundering results in approximately 0.8 to 2.0$ trillion being
laundered every year, which equates to 2 to 5% of the world’s
GDP [1], [2]. Being a global issue, money laundering results
in approximately

In [79]:
from streamlit.runtime.scriptrunner import RerunException
from streamlit.runtime.runtime import Runtime
import streamlit as st

def streamlit_app():
    st.title("Hey there! I'm Scholarly. Ready to review your paper and give you feedback. Let’s get started!")
    uploaded_file = st.file_uploader('Upload your paper in .pdf format', type="pdf")
    if uploaded_file is not None:
        graph = initialization(uploaded_file)
    
    with st.form("my_form"):
        text = st.text_area(
            "Enter text:",
            "Should this paper be accepted?",
        )
        submitted = st.form_submit_button("Submit")
        
        if submitted:
            graph.invoke({"messages": ["Hi, Who are you?"]})

streamlit_app()

2024-12-01 17:09:57.309 
  command:

    streamlit run /Users/kuchikihater/Desktop/Scholarly/.venv/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-12-01 17:09:57.315 Session state does not function when running a script without `streamlit run`


In [80]:
from streamlit.runtime.scriptrunner import RerunException
from streamlit.runtime.runtime import Runtime

Runtime()._start_web_server()


TypeError: Runtime.__init__() missing 1 required positional argument: 'config'