In [None]:
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Neo4jVector
from langchain.embeddings import GPT4AllEmbeddings
from langchain.embeddings import OllamaEmbeddings
import warnings
warnings.filterwarnings("ignore")

from langchain.tools import DuckDuckGoSearchRun
from langchain.tools import DuckDuckGoSearchResults
import pandas as pd
import time


In [None]:
data =  pd.read_csv("Microsoft_Purview_Classifiers.csv")
data["ddg"] = ""
data.head()

In [None]:
search = DuckDuckGoSearchRun()

In [None]:
search.run("What is Neo4J?")

In [None]:
for i, row in data.iterrows():
    classe = data.at[i, "Classifier"]
    question = data.at[i, "question"]
    q = question.replace("{}", classe)
    result = search.run(q)
    print(result)
    data.at[i, "ddg"] = result
    time.sleep(5)

In [None]:
data.head()

In [None]:
data = data.fillna("N/A")

In [None]:
data.isnull().sum()

In [None]:
data.to_csv("Microsoft_Purview_Classifiers_with_ddg.csv", index=False)

In [None]:

data = pd.read_csv("Microsoft_Purview_Classifiers_with_ddg.csv")

def combine_description_ddg(row):
    return row['ddg'] + "\n" + row['Description'] 

In [None]:
data['detail'] = data.apply(lambda row: combine_description_ddg(row), axis=1)

In [None]:
data['detail'].head()

In [None]:
data= data[['Classifier', 'detail']]
data.columns = ['classifier', 'detail']
data.to_csv("Microsoft_Purview_Classifiers_with_ddg_to_load.csv", index=False)

In [None]:
data.columns


In [None]:
data.isnull().sum()

In [None]:
len(data)

In [None]:
from langchain.document_loaders import CSVLoader
 
# Load data from a CSV file using CSVLoader
loader = CSVLoader("Microsoft_Purview_Classifiers_with_ddg_to_load.csv", metadata_columns=["classifier"])
documents = loader.load()

In [None]:
documents[0]

In [None]:
documents[0].metadata.get("classifier")

In [None]:
documents[0].metadata

In [None]:
documents[0].page_content

In [None]:
len(documents)

# Neo4j
## Install Desktop

https://neo4j.com/download/?utm_source=google&utm_medium=PaidSearch&utm_campaign=GDB&utm_content=EMEA-X-SEM-Brand-Evergreen-Search&utm_term=neo4j%20desktop%20install&gad_source=1&gclid=CjwKCAiA8YyuBhBSEiwA5R3-EzQ_L4ng_Y721jeuuqxDczyq_RCVfzFRmYBi_tikA3ju2WFuIc1nzRoCfx8QAvD_BwE

## Install Aura

https://neo4j.com/cloud/platform/aura-graph-database/?utm_medium=PaidSearch&utm_source=google&utm_campaign=GDB&utm_content=EMEA-X-SEM-Brand-Evergreen-Search&utm_term=neo4j%20desktop%20install&gclid=CjwKCAiA8YyuBhBSEiwA5R3-EzQ_L4ng_Y721jeuuqxDczyq_RCVfzFRmYBi_tikA3ju2WFuIc1nzRoCfx8QAvD_BwE

## Neo4J Sandbox

https://sandbox.neo4j.com/onboarding


## Neo4J Data Science Library and Generative AI

https://neo4j.com/generativeai/?utm_medium=PaidSearch&utm_source=google&utm_campaign=GDB&utm_content=EMEA-X-SEM-Category-Expansion-Evergreen-Search&gclid=CjwKCAiA8YyuBhBSEiwA5R3-Ezfq_1Fl3yTcYNv7xKFArkZfk-F0epz352XKA9Q3OaHiQn3hzPlxZxoC6f4QAvD_BwE


## langchain & neo4j
https://python.langchain.com/docs/integrations/vectorstores/neo4jvector

## text_embeddings
https://python.langchain.com/docs/integrations/text_embedding

## Gpt4ALL
https://docs.gpt4all.io/gpt4all_python_embedding.html#generating-embeddings

## Ollama
https://python.langchain.com/docs/integrations/text_embedding/ollama

## Openai
https://python.langchain.com/docs/integrations/text_embedding/openai

## Spacy
https://python.langchain.com/docs/integrations/text_embedding/spacy_embedding

In [None]:
import os
from neo4j import GraphDatabase, Result
import getpass
url =  "bolt://127.0.0.1:7687"
user = "neo4j"
password = getpass.getpass("Password Neo4J:")

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [None]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
#embeddings = GPT4AllEmbeddings()
#embeddings = OllamaEmbeddings(model="orca-mini", num_gpu=1, show_progress=True)

In [None]:
db = Neo4jVector.from_documents(
    documents, embeddings, url=url, username=user, password=password
)

In [None]:
query = """Lufthansa flies back to profit

German airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.

In a preliminary report, the airline announced net profits of 400m euros ($527.61m; Â£274.73m), compared with a loss of 984m euros in 2003. Operating profits were at 380m euros, ten times more than in 2003. Lufthansa was hit in 2003 by tough competition and a dip in demand following the Iraq war and the killer SARS virus. It was also hit by troubles at its US catering business. Last year, Lufthansa showed signs of recovery even as some European and US airlines were teetering on the brink of bankruptcy. The board of Lufthansa has recommended paying a 2004 dividend of 0.30 euros per share. In 2003, shareholders did not get a dividend. The company said that it will give all the details of its 2004 results on 23 March.
"""
docs_with_score = db.similarity_search_with_score(query, k=5)

In [None]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)
    print(f"Class {doc.metadata.get('classifier')}")
    print("-" * 80)

In [None]:
driver = GraphDatabase.driver(
    url,
    auth=(user, password)
)

driver.verify_connectivity()

In [None]:
class_purview = driver.execute_query("""
MATCH (m) WHERE m.text IS NOT NULL
RETURN m.id AS classID, m.embedding AS embedding
LIMIT 1000
""",
result_transformer_=Result.to_df)

len(class_purview)

In [None]:
class_purview.head()

In [None]:
len(class_purview[0:1].embedding.values[0])