Merge pull request #142 from jupyter-naas/129-kg---push-abi-databases…

…-to-neo4j feat: push abi databases to neo4j and create streamlit app
jupyter-naas · May 13, 2024 · 1394f74 · 1394f74
2 parents 2bfa156 + cd85daf
commit 1394f74
Show file tree

Hide file tree

Showing 20 changed files with 1,818 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,10 +8,14 @@
 .parquet
 .logs
 
-# Models
+# Models<
 pipeline_executions
 
 # Outputs
 outputs
 
+# App
+__pycache__
+.streamlit/secrets.toml
+.env
 .DS_Store
diff --git a/__pipeline__.ipynb b/__pipeline__.ipynb
@@ -129,7 +129,11 @@
     "entity_start = 0\n",
     "entity_end = None\n",
     "limit_linkedin = 30\n",
+    "if not naas.is_production():\n",
+    "    limit_linkedin = 5\n",
     "limit_llm = 50\n",
+    "if not naas.is_production():\n",
+    "    limit_llm = 5\n",
     "cron = \"0 12 * * *\"\n",
     "\n",
     "# Outputs\n",

diff --git a/apps/streamlit-chat/.env.template b/apps/streamlit-chat/.env.template
@@ -0,0 +1,6 @@
+OPENAI_API_KEY=""
+OPENAI_MODEL=""
+NEO4J_URI=""
+NEO4J_USERNAME=""
+NEO4J_PASSWORD=""
+AVATAR=""
diff --git a/apps/streamlit-chat/.gitpod.yml b/apps/streamlit-chat/.gitpod.yml
@@ -0,0 +1,5 @@
+tasks:
+  - init: >
+      pyenv install --force 3.11 &&
+      pyenv global 3.11 &&
+      pip install -r requirements.txt
diff --git a/apps/streamlit-chat/agent.py b/apps/streamlit-chat/agent.py
@@ -0,0 +1,110 @@
+from langchain.agents import AgentExecutor, create_react_agent
+from llm import llm
+from langchain.tools import Tool
+from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from langchain.prompts import PromptTemplate
+from tools.vector import kg_qa
+from tools.cypher import cypher_qa
+
+def run_retriever(query):
+    results = kg_qa.invoke({"query": query})
+    print(results)
+    return results["result"]
+
+def run_cypher(query):
+    results = cypher_qa.invoke({"query": query})
+    return results["result"]
+
+tools = [
+    Tool.from_function(
+        name="General Chat",
+        description="For content creation and general chat not covered by other tools",
+        func=llm.invoke,
+        return_direct=True
+        ),
+    Tool.from_function(
+        name="Vector Search Index",
+        description="""Useful to provide information about content. 
+        Not useful for any sort of aggregation like counting the number of posts, ranking and filtering by date, etc.
+        Use full question as input.
+        """,
+        func = run_retriever,
+        return_direct=True
+    ),
+    Tool.from_function(
+        name="Graph Cypher QA Chain",
+        description="""Useful when you need to answer questions about content, concepts, target, objective, content types and their dependencies. 
+        Also useful for any sort of aggregation like counting the number of posts, ranking and filtering by date, etc.
+        Use full question as input.
+        """,
+        func = run_cypher,
+        return_direct=True
+    ),
+]
+
+memory = ConversationBufferWindowMemory(
+    memory_key='chat_history',
+    k=5,
+    return_messages=True,
+)
+
+agent_prompt = PromptTemplate.from_template("""
+Act as a Content Assistant who has access to valuable data and insights about the content strategy. 
+Your role is to manage and optimize the content, ensuring it reaches the target audience effectively. 
+When a user ask a question related to posts, always returned the title, the url and the date to identify them.
+When a user ask for a list of somethings, first always return the number results and the first 3 results as bullet list. 
+Then ask if the user wants to have the full list if it is not complete.
+                                            
+Do not answer any questions that do not relate to content.
+
+Use your pre-trained knowledge only to answer questions about content creation with the information provided in the context.
+
+TOOLS:
+------
+
+You have access to the following tools:
+
+{tools}
+
+To use a tool, please use the following format:
+
+```
+Thought: Do I need to use a tool? Yes
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+```
+
+When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
+
+```
+Thought: Do I need to use a tool? No
+Final Answer: [your response here]
+```
+
+Begin!
+
+Previous conversation history:
+{chat_history}
+
+New input: {input}
+{agent_scratchpad}
+""")
+
+agent = create_react_agent(llm, tools, agent_prompt)
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=tools,
+    memory=memory,
+    verbose=True
+    )
+
+def generate_response(prompt):
+    """
+    Create a handler that calls the Conversational agent
+    and returns a response to be rendered in the UI
+    """
+
+    response = agent_executor.invoke({"input": prompt})
+
+    return response['output']
diff --git a/apps/streamlit-chat/bot.py b/apps/streamlit-chat/bot.py
@@ -0,0 +1,45 @@
+import streamlit as st
+from utils import write_message
+from agent import generate_response
+from os import environ
+
+# Set page config
+page_title = "Naas.ai"
+page_icon = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAMAAABF0y+mAAAAbFBMVEUYGhwQEhUAAAYVFxkGCw4AAAAVGBpOT1GLjY5GR0koKSttbnCqqqu2treRk5RTVFUbHR8TFRgkJSf////IycnR0dEzNDa7vLz29va+v7/Y2Nmenp/n6OgABwtcXV7f4OB5enukpKU6Oz0/QEKodfx9AAAAfklEQVR4AdVPxRHEQAwLL6PDDP3XeMye+yf6mC0pOCbCKIyTNE3DDM9iQhkXUmljHZp5BXkBV8jSoWEFUBdV0wJ0PsbDuivTlFXQmxAPm/LSHMYJZjwEmVwSVzaggz/Dq7LOxjsfWgnLcE0ChawE2SrK+La2XZMfuCR+J4fEGdDqCumQc8zDAAAAAElFTkSuQmCC"
+st.set_page_config(
+    page_title,
+    page_icon=page_icon
+    )
+
+# Set up Session State
+if "messages" not in st.session_state:
+    st.session_state.messages = [
+        {"role": "assistant", "content": "Hi, I'm the Your Content Assistant!  How can I help you?"},
+    ]
+
+# Submit handler
+def handle_submit(message):
+    """
+    Submit handler:
+
+    You will modify this method to talk with an LLM and provide
+    context using data from Neo4j.
+    """
+
+    # Handle the response
+    with st.spinner('Thinking...'):
+
+        response = generate_response(message)
+        write_message('assistant', response)
+
+# Display messages in Session State
+for message in st.session_state.messages:
+    write_message(message['role'], message['content'], save=False)
+
+# Handle any user input
+if prompt := st.chat_input("What is up?"):
+    # Display user message in chat message container
+    write_message('user', prompt)
+
+    # Generate a response
+    handle_submit(prompt)
diff --git a/apps/streamlit-chat/graph.py b/apps/streamlit-chat/graph.py
@@ -0,0 +1,13 @@
+import streamlit as st
+from langchain_community.graphs import Neo4jGraph
+from os import environ
+
+uri = environ.get("NEO4J_URI")
+username = environ.get("NEO4J_USERNAME")
+password = environ.get("NEO4J_PASSWORD")
+
+graph = Neo4jGraph(
+    url=uri,
+    username=username,
+    password=password,
+)
diff --git a/apps/streamlit-chat/llm.py b/apps/streamlit-chat/llm.py
@@ -0,0 +1,16 @@
+import streamlit as st
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+from os import environ
+
+openai_api_key = environ.get("OPENAI_API_KEY")
+model = environ.get("OPENAI_MODEL")
+
+llm = ChatOpenAI(
+    openai_api_key=openai_api_key,
+    model=model,
+)
+
+embeddings = OpenAIEmbeddings(
+    openai_api_key=openai_api_key
+)
diff --git a/apps/streamlit-chat/requirements.txt b/apps/streamlit-chat/requirements.txt
@@ -0,0 +1,6 @@
+langchain==0.1.17
+openai==1.26.0
+langchain_openai==0.1.6
+neo4j-driver==5.2.1
+streamlit==1.34.0
+naas-python==1.3.3
diff --git a/apps/streamlit-chat/tools/cypher.py b/apps/streamlit-chat/tools/cypher.py
@@ -0,0 +1,35 @@
+from langchain.chains import GraphCypherQAChain
+from langchain.prompts.prompt import PromptTemplate
+from llm import llm
+from graph import graph
+
+CYPHER_GENERATION_TEMPLATE = """
+You are an expert Neo4j Developer translating user questions into Cypher to answer questions about content and provide recommendations.
+Convert the user's question based on the schema.
+
+Use only the provided relationship types and properties in the schema.
+Do not use any other relationship types or properties that are not provided.
+
+Fine Tuning:
+
+When a user ask about "performance" or "engagement" return the property "engagements" or "engagement_score" if it's not empty or equal to 0 from Content node.
+
+Example Cypher Statements:
+
+
+
+Schema:
+{schema}
+
+Question:
+{question}
+"""
+
+cypher_prompt = PromptTemplate.from_template(CYPHER_GENERATION_TEMPLATE)
+
+cypher_qa = GraphCypherQAChain.from_llm(
+    llm,
+    graph=graph,
+    verbose=True,
+    cypher_prompt=cypher_prompt
+)
diff --git a/apps/streamlit-chat/tools/vector.py b/apps/streamlit-chat/tools/vector.py
@@ -0,0 +1,29 @@
+import streamlit as st
+from langchain_community.vectorstores.neo4j_vector import Neo4jVector
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain.chains import RetrievalQA
+from llm import llm, embeddings
+from os import environ
+
+uri = environ.get("NEO4J_URI")
+username = environ.get("NEO4J_USERNAME")
+password = environ.get("NEO4J_PASSWORD")
+
+neo4jvector = Neo4jVector.from_existing_index(
+    embeddings,                              # <1>
+    url=uri,                                 # <2>
+    username=username,                       # <3>
+    password=password,                       # <4>
+    index_name="content",                    # <5>
+    node_label="Content",                    # <6>
+    text_node_property=['entity', 'scenario', 'source', 'published_date', 'id', 'title', 'text', 'concept', 'sentiment', 'target', 'objective', 'views', 'likes', 'comments', 'shares', 'engagements', 'engagement_score', 'type', 'author_name', 'author_url', 'length', 'people_mentioned', 'organization_mentioned', 'content_title_shared', 'content_url_shared', 'linkedin_links', 'image_shared', 'tags', 'url', 'date_extract', 'scenario_order'],               # <7>
+    embedding_node_property="embedding",     # <8>
+)
+
+retriever = neo4jvector.as_retriever()
+
+kg_qa = RetrievalQA.from_chain_type(
+    llm,                  # <1>
+    chain_type="stuff",   # <2>
+    retriever=retriever,  # <3>
+)
diff --git a/apps/streamlit-chat/utils.py b/apps/streamlit-chat/utils.py
@@ -0,0 +1,22 @@
+import streamlit as st
+from os import environ
+
+avatar_assistant = "https://naasai-public.s3.eu-west-3.amazonaws.com/abi-demo/content_creation.png"
+avatar_human = environ.get("AVATAR")
+
+def write_message(role, content, save=True):
+    """
+    This is a helper function that saves a message to the
+    session state and then writes a message to the UI
+    """
+    # Append to session state
+    if save:
+        st.session_state.messages.append({"role": role, "content": content})
+
+    # Write to UI
+    if role == "assistant":
+        avatar = avatar_assistant
+    else:
+        avatar = avatar_human
+    with st.chat_message(role, avatar=avatar):
+        st.markdown(content)
diff --git a/models/content-engine/core/domain/Content_Create_Posts_database.ipynb b/models/content-engine/core/domain/Content_Create_Posts_database.ipynb
@@ -452,6 +452,9 @@
     "            \n",
     "        # Sort values\n",
     "        df[\"SCENARIO_ORDER\"] = pd.to_datetime(df['PUBLISHED_DATE'].str[:19], format='%Y-%m-%d %H:%M:%S').dt.strftime(\"%Y%W\")\n",
+    "        \n",
+    "        # Drop duplicates\n",
+    "        df = df.drop_duplicates(\"TITLE\", keep=\"last\")\n",
     "        df = df.sort_values(by=[\"PUBLISHED_DATE\", \"ENTITY\"], ascending=[False, True])\n",
     "    return df.reset_index(drop=True)\n",
     "    \n",

diff --git a/models/growth-engine/core/domain/Naas_Send_leads_generation_notification.ipynb b/models/growth-engine/core/domain/Naas_Send_leads_generation_notification.ipynb
@@ -141,6 +141,7 @@
     "sheet_name = \"GROWTH\"\n",
     "input_image = \"growth_trend.png\"\n",
     "input_html = \"growth_trend.html\"\n",
+    "notification_days = [1, 2, 4, 6]\n",
     "\n",
     "# Outputs\n",
     "output_dir = os.path.join(entity_dir, \"growth-engine\", date.today().isoformat())\n",
@@ -407,7 +408,7 @@
    },
    "outputs": [],
    "source": [
-    "if len(email_to) > 0:\n",
+    "if len(email_to) > 0 and date.today().weekday() in notification_days:\n",
     "    naas.notification.send(\n",
     "        email_to=email_to,\n",
     "        subject=email_subject,\n",