In [1]:
!pip install langchain==0.0.142 openai==0.27.4 beautifulsoup4==4.12.2 chromadb==0.3.21 GitPython==3.1.31

Collecting langchain==0.0.142
  Downloading langchain-0.0.142-py3-none-any.whl.metadata (8.5 kB)
Collecting openai==0.27.4
  Downloading openai-0.27.4-py3-none-any.whl.metadata (13 kB)
Collecting beautifulsoup4==4.12.2
  Downloading beautifulsoup4-4.12.2-py3-none-any.whl.metadata (3.6 kB)
Collecting chromadb==0.3.21
  Downloading chromadb-0.3.21-py3-none-any.whl.metadata (5.7 kB)
Collecting GitPython==3.1.31
  Downloading GitPython-3.1.31-py3-none-any.whl.metadata (1.3 kB)
Collecting SQLAlchemy<2,>=1 (from langchain==0.0.142)
  Downloading SQLAlchemy-1.4.54-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain==0.0.142)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl.metadata (22 kB)
Collecting gptcache>=0.1.7 (from langchain==0.0.142)
  Downloading gptcache-0.1.44-py3-none-any.whl.metadata (24 kB)
Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain==0.

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

from langchain.document_loaders import (
    GitLoader,
    YoutubeLoader,
    DataFrameLoader
)
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import initialize_agent, Tool

In [3]:
# Knowledge base
kb_loader = GitLoader(
    clone_url="https://github.com/neo4j-documentation/knowledge-base",
    repo_path="./repos/kb/",
    branch="master",
    file_filter=lambda file_path: file_path.endswith(".adoc")
    and "articles" in file_path,
)
kb_data = kb_loader.load()
print(len(kb_data))

309


In [4]:
# Define text chunk strategy
splitter = CharacterTextSplitter(
  chunk_size=2000,
  chunk_overlap=50,
  separator=" "
)
# GDS guides
gds_loader = GitLoader(
    clone_url="https://github.com/neo4j/graph-data-science",
    repo_path="./repos/gds/",
    branch="master",
    file_filter=lambda file_path: file_path.endswith(".adoc")
    and "pages" in file_path,
)
gds_data = gds_loader.load()
# Split documents into chunks
gds_data_split = splitter.split_documents(gds_data)
print(len(gds_data_split))

864


In [6]:
! pip install youtube-transcript-api

Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl.metadata (15 kB)
Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.2


In [7]:
# Youtube
yt_loader = YoutubeLoader("1sRgsEKlUr0")
yt_data = yt_loader.load()
yt_data_split = splitter.split_documents(yt_data)
print(len(yt_data_split))

10


In [8]:
# Medium
article_url = "https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/medium/neo4j_articles.csv"
medium = pd.read_csv(article_url, sep=";")
medium["source"] = medium["url"]
medium_loader = DataFrameLoader(
    medium[["text", "source"]],
    page_content_column="text")
medium_data = medium_loader.load()
medium_data_split = splitter.split_documents(medium_data)
print(len(medium_data_split))

2244


In [9]:
# Stackoverflow
so_data = []
for i in range(1, 20):
    # Define the Stack Overflow API endpoint and parameters
    api_url = "https://api.stackexchange.com/2.3/questions"
    params = {
        "order": "desc",
        "sort": "creation",
        "filter": "!-MBrU_IzpJ5H-AG6Bbzy.X-BYQe(2v-.J",
        "tagged": "neo4j",
        "site": "stackoverflow",
        "pagesize": 100,
        "page": i,
    }
    # Send GET request to Stack Overflow API
    response = requests.get(api_url, params=params)
    data = response.json()
    # Retrieve the resolved questions
    resolved_questions = [
        question
        for question in data["items"]
        if question["is_answered"] and question.get("accepted_answer_id")
    ]

    # Print the resolved questions
    for question in resolved_questions:
        text = (
            "Title:",
            question["title"] + "\n" + "Question:",
            BeautifulSoup(question["body"]).get_text()
            + "\n"
            + BeautifulSoup(
                [x["body"] for x in question["answers"] if x["is_accepted"]][0]
            ).get_text(),
        )
        source = question["link"]
        so_data.append(Document(page_content=str(text), metadata={"source": source}))
print(len(so_data))

806


In [12]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.1 MB[0m [31m16.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.7.0


In [13]:
# Define embedding model
OPENAI_API_KEY = "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"
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

sales_data = medium_data_split + yt_data_split
sales_store = Chroma.from_documents(
    sales_data, embeddings, collection_name="sales"
)

support_data = kb_data + gds_data_split + so_data
support_store = Chroma.from_documents(
    support_data, embeddings, collection_name="support"
)



In [14]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=OPENAI_API_KEY,
    max_tokens=512,
)

In [15]:
sales_template = """As a Neo4j marketing bot, your goal is to provide accurate and helpful information about Neo4j,
a powerful graph database used for building various applications.
You should answer user inquiries based on the context provided and avoid making up answers.
If you don't know the answer, simply state that you don't know.
Remember to provide relevant information about Neo4j's features, benefits,
and use cases to assist the user in understanding its value for application development.

{context}

Question: {question}"""
SALES_PROMPT = PromptTemplate(
    template=sales_template, input_variables=["context", "question"]
)
sales_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=sales_store.as_retriever(),
    chain_type_kwargs={"prompt": SALES_PROMPT},
)

In [16]:
print(sales_qa.run("What are the main benefits of using Neo4j?"))

Some of the main benefits of using Neo4j include:

1. Speed and Performance: Neo4j is known for its fast performance, especially when it comes to querying complex relationships in data. This makes it ideal for applications that require real-time data analysis.

2. Flexibility: Neo4j's graph data model allows for flexible and dynamic data structures, making it easy to adapt to changing data requirements without the need for complex schema changes.

3. Intuitive Data Modeling: The graph data model used by Neo4j closely aligns with how we naturally think about and work with data, making it easier to model and query relationships between entities.

4. Scalability: Neo4j can handle large amounts of data and complex relationships, making it suitable for applications that require scalability and high availability.

5. Graph Algorithms: Neo4j provides a wide range of built-in graph algorithms that can be used to analyze and extract insights from connected data, making it a powerful tool for da

In [17]:
support_template = """
As a Neo4j Customer Support bot, you are here to assist with any issues
a user might be facing with their graph database implementation and Cypher statements.
Please provide as much detail as possible about the problem, how to solve it, and steps a user should take to fix it.
If the provided context doesn't provide enough information, you are allowed to use your knowledge and experience to offer you the best possible assistance.

{context}

Question: {question}"""

SUPPORT_PROMPT = PromptTemplate(
    template=support_template, input_variables=["context", "question"]
)

support_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=support_store.as_retriever(),
    chain_type_kwargs={"prompt": SUPPORT_PROMPT},
)

In [18]:
print(support_qa.run("""
I am having my graph in a VM and i want to use GDS Plugins in my graph.
I didn't see any proper documentation to install it in my server where it was working only on locally.
Anyone clear me this to install the GDS Plugin in the VM ?
"""))

To install the GDS plugin in your VM where your Neo4j server is running, you can follow these steps:

1. Download the compatible version of the Neo4j Graph Data Science Library from the Neo4j website.

2. Decompress the downloaded file and locate the `neo4j-graph-data-science-[version].jar` file.

3. Move the `neo4j-graph-data-science-[version].jar` file into the `plugins` directory of your Neo4j server. The location of this directory can vary depending on your setup, but it is usually found under the `plugins` directory within the Neo4j installation directory.

4. Edit the `neo4j.conf` file located in the `conf` directory of your Neo4j installation. Add the following configuration entry to allow unrestricted access to GDS procedures:
```
dbms.security.procedures.unrestricted=gds.*
```

5. Check if the procedure allowlist is enabled in the `neo4j.conf` file. If it is, add the GDS library to the allowlist:
```
dbms.security.procedures.allowlist=gds.*
```

6. Restart your Neo4j server to

In [19]:
# the zero-shot-react-description agent will use the "description" string to select tools
tools = [
    Tool(
        name = "sales",
        func=sales_qa.run,
        description="""useful for when a user is interested in various Neo4j information,
                       use-cases, or applications. A user is not asking for any debugging, but is only
                       interested in general advice for integrating and using Neo4j.
                       Input should be a fully formed question."""
    ),
    Tool(
        name = "support",
        func=support_qa.run,
        description="""useful for when when a user asks to optimize or debug a Cypher statement or needs
                       specific instructions how to accomplish a specified task.
                       Input should be a fully formed question."""
    ),
]

In [20]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [21]:
agent.run("""What are some GPT-4 applications with Neo4j?""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should provide general advice on integrating GPT-4 with Neo4j.
Action: sales
Action Input: Provide information on how GPT-4 can be integrated with Neo4j for various applications.[0m
Observation: [36;1m[1;3mAs of my last update, I do not have specific information on integrating GPT-4 with Neo4j for various applications. However, Neo4j's flexible and powerful graph database can be used in conjunction with various technologies and tools to enhance application development. 

Neo4j's graph data model is well-suited for representing complex relationships and can be integrated with machine learning models like GPT-4 to analyze and derive insights from connected data. By leveraging Neo4j's capabilities for storing and querying graph data, developers can create applications that benefit from the rich contextual information provided by graph structures.

I recommend exploring Neo4j's documentation and resources for guidance on int

"For specific information on integrating GPT-4 with Neo4j for various applications, it is recommended to explore Neo4j's documentation, resources, developer community, or support team for guidance and assistance."

In [22]:
!pip install streamlit openai langchain

Collecting streamlit
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Downloading streamlit-1.38.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m118.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.9/82.9 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: watchdog, pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.38.0 wat

In [23]:
import streamlit as st
from langchain.llms import OpenAI

st.title('🦜🔗 Quickstart App')

openai_api_key = st.sidebar.text_input('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')

def generate_response(input_text):
  llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key)
  st.info(llm(input_text))

with st.form('my_form'):
  text = st.text_area('Enter text:', 'What are the three key pieces of advice for learning how to code?')
  submitted = st.form_submit_button('Submit')
  if not openai_api_key.startswith('sk-'):
    st.warning('Please enter your OpenAI API key!', icon='⚠')
  if submitted and openai_api_key.startswith('sk-'):
    generate_response(text)

2024-09-30 17:25:13.303 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-09-30 17:25:13.312 Session state does not function when running a script without `streamlit run`
