In [None]:
!pip install langchain openai weaviate-client datasets

In [None]:
import inspect
import re
import os
import weaviate
import datasets

from getpass import getpass
from langchain import OpenAI, PromptTemplate
from langchain.chains import LLMChain, LLMMathChain, TransformChain, SequentialChain
from langchain.callbacks import get_openai_callback

In [None]:
WEAVIATE_URL = getpass()

··········


In [None]:
WEAVIATE_API_KEY = getpass()

··········


In [None]:
OPENAI_API_KEY = getpass()

··········


## Create Openai instance

Instantiation of the OpenAi model with a low temperature to keep the accuracy of the response as close as possible to the context.

In [None]:
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0, 
    openai_api_key=OPENAI_API_KEY
    )

Test the llm with this example prompt.

In [None]:
prompt = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: Large Language Models (LLMs) are the latest models used in NLP.
Their superior performance over smaller models has made them incredibly
useful for developers building NLP enabled applications. These models
can be accessed via Hugging Face's `transformers` library, via OpenAI
using the `openai` library, and via Cohere using the `cohere` library.

Question: Which libraries and model providers offer LLMs?

Answer: """

In [None]:
print(llm(prompt))

Hugging Face, OpenAI, and Cohere offer LLMs.


## Create weaviate client

In [None]:
auth_config = weaviate.auth.AuthApiKey(
    api_key=WEAVIATE_API_KEY
)

client = weaviate.Client(
    url=WEAVIATE_URL, auth_client_secret=auth_config
) 

### Import Data

In [None]:
CONTEXT_CHUNK_SCHEMA = {
    "class": "Context_Chunk",
    "description": "Context Chunks",
    "moduleConfig": {
        "text2vec-transformers": {
            "poolingStrategy": "masked_mean",
            "vectorizeClassName": False,
        }
    },
    "properties": [
        {
            "dataType": ["text"],
            "description": "The content of the chunk",
            "moduleConfig": {
                "text2vec-transformers": {"skip": False, "vectorizePropertyName": False}
            },
            "name": "content",
        },
        {
            "dataType": ["string"],
            "description": "URL of the source",
            "moduleConfig": {
                "text2vec-transformers": {"skip": True, "vectorizePropertyName": False}
            },
            "name": "source",
        },
    ],
    "vectorIndexType": "hnsw",
    "vectorizer": "text2vec-transformers",
}

In [None]:
# client.schema.delete_class('Context_Chunk')

In [None]:
client.schema.create_class(CONTEXT_CHUNK_SCHEMA)

In [None]:
data_files = {"cl100k_base": "train-cl100k_base.jsonl", "p50k_base": "train-p50k_base.jsonl"}
dataset = datasets.load_dataset("kaahila/sugarcrm_130_documentation", split="cl100k_base", data_files=data_files)
dataset

Downloading readme:   0%|          | 0.00/459 [00:00<?, ?B/s]

Downloading and preparing dataset json/kaahila--sugarcrm_130_documentation to /root/.cache/huggingface/datasets/kaahila___json/kaahila--sugarcrm_130_documentation-a9456e71b5ba780e/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/3.91M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.57M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating cl100k_base split: 0 examples [00:00, ? examples/s]

Generating p50k_base split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/kaahila___json/kaahila--sugarcrm_130_documentation-a9456e71b5ba780e/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.


Dataset({
    features: ['id', 'text', 'source'],
    num_rows: 2389
})

In [None]:
dataset[0]

{'id': '1daa17712cd0-0',
 'text': "User Interface\nOverview\nSugar's user interface is dependent on the client (i.e. base, mobile, or portal) being used to access the system. Clients are the various platforms that use Sugar's APIs to render the user interface. Each platform type will have a specific path for its components. While the Developer Guide mainly covers the base client type, the following sections will outline the various metadata locations.\nClients\nClients are the various platforms that access and useÂ\xa0Sidecar to render content. Depending on the platform you are using, the layout, view, and metadata will be driven based on its client type. The following sections describe the client types.\nbase\nThe base client is the Sugar application that you use to access your data from a web browser. The framework's specific views, layouts, and fields are rendered usingÂ\xa0Sidecar. Â\xa0Files specific to this client type can be found in the following directories:\n./clients/base/\n

In [None]:
exclude_urls = [
    'https://support.sugarcrm.com/Documentation/Sugar_Developer/Sugar_Developer_Guide_13.0/Integration/Web_Services/REST_API/Endpoints/'
]

def url_is_excluded(url):
  for exclude_url in exclude_urls:
      if url.startswith(exclude_url):
        return True
      return False

In [None]:
from tqdm.auto import tqdm

for data in tqdm(dataset):
  if url_is_excluded(data['source']):
    continue

  data_object = {
      'content': data['text'],
      'source': data['source'],
  }
  client.data_object.create(
      data_object=data_object,
      class_name="Context_Chunk",
      uuid=weaviate.util.generate_uuid5(data['id'])
  )
  

  0%|          | 0/2389 [00:00<?, ?it/s]

In [None]:
import json
nearText = {
    "concepts": ["Which is the right path to define an custom record view"],
}

result = (
    client.query.get("Context_Chunk", ["content", "source"])
    .with_near_text(nearText)
    .with_limit(5)
    .do()
)
print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Context_Chunk": [
        {
          "content": "Overriding Layouts\nOverview\nThis page explains how to override a stock layout component. For this example, we will extend the stock record view and create a custom view named \"my-record\" that will be used in our record layout's override. This example involves two\u00c2\u00a0steps:\nOverride the Layout\nExtend the View\nThese steps are explained in the following sections.\nOverriding the Layout\nFirst,\u00c2\u00a0copy ./clients/base/layouts/record/record.php to ./custom/clients/base/layouts/record/record.php. Once copied, modify the following line from:\n'view' => 'record',\nTo:\n'view' => 'my-record',\nThat line will change the record layout from using the base record.js view, ./clients/base/views/record/record.js, to instead use a custom view that we will create in ./custom/clients/base/views/my-record/my-record.js. At this point, the\u00c2\u00a0custom layout override should be very similar to the 

## Create an Question and Answer Chain with Langchain

In [None]:
from langchain.vectorstores.weaviate import Weaviate
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
import weaviate

Create the vectorstore and add the "*source*" field from the Weaviate schema to the attributes. This ensures that the source of each content is also returned in your results.

In [None]:
vectorstore = Weaviate(client=client, index_name="Context_Chunk", text_key="content", attributes=['source'])

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff", return_source_documents=True)

ask some questions

In [None]:
query = "In welchem Directory muss ich meine custom Access Control Listen ablegen?"
result = qa({"query": query})

 take a look at the response from the llm

In [None]:
result['result']

'Die custom Access Control Listen müssen im Verzeichnis "custom" abgelegt werden.'

With **return_source_documents=True** in the **RetrievalQA** chain the **source_documents** from what the llm created the answer are also contained in the result





In [None]:
len(result['source_documents'])

4

In [None]:
print([document.metadata['source'] for document in result['source_documents']])

['https://support.sugarcrm.com/Documentation/Sugar_Developer/Sugar_Developer_Guide_13.0/Security/Web_Server_Configuration/index.html', 'https://support.sugarcrm.com/Documentation/Sugar_Developer/Sugar_Developer_Guide_13.0/Architecture/Languages/Managing_Lists/index.html', 'https://support.sugarcrm.com/Documentation/Sugar_Developer/Sugar_Developer_Guide_13.0/Data_Framework/Vardefs/Specifying_Custom_Indexes_for_Import_Duplicate_Checking/index.html', 'https://support.sugarcrm.com/Documentation/Sugar_Developer/Sugar_Developer_Guide_13.0/Data_Framework/Vardefs/Working_With_Indexes/index.html']


## Create an Question and Answering Chat Bot that keeps the chat history


In [None]:
from langchain.vectorstores.weaviate import Weaviate
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
import weaviate

qa = ChatVectorDBChain.from_llm(llm, vectorstore)

chat_history = []

print("Please enter a question or dialogue to get started!")

while True:
    query = input("")
    result = qa({"question": query, "chat_history": chat_history})
    print(result["answer"])
    chat_history = [(query, result["answer"])]

Welcome to the Weaviate ChatVectorDBChain Demo!
Please enter a question or dialogue to get started!
Wie bekomme ich einen DBAL querybuilder in sugarcrm?




To get a DBAL querybuilder in SugarCRM, you can use the Doctrine QueryBuilder class that is already integrated into SugarCRM. This can be accessed through the SugarQuery class, located in ./include/SugarQuery/SugarQuery.php. Simply create a new instance of SugarQuery and use its methods to build your query using the Doctrine QueryBuilder syntax.
How to load an account Bean?
Um einen Account Bean zu laden, können Sie die Methode getBean() der BeanFactory-Klasse verwenden und den Modulnamen "Accounts" sowie die ID des gewünschten Datensatzes als Parameter übergeben. Zum Beispiel:

$bean = BeanFactory::getBean('Accounts', '15bcf01c-1e1e-11e8-9e13-f45c89a8598f');

Dies lädt den Account Bean mit der ID "15bcf01c-1e1e-11e8-9e13-f45c89a8598f". Beachten Sie, dass Sie auch zusätzliche Zugriffsrechte für den Bean setzen können, indem Sie das acl_team_set_id-Attribut des Beans festlegen und dann die save()-Methode aufrufen.


KeyboardInterrupt: ignored