In [5]:
import openai
import pandas as pd
import datetime, os, yaml

# Authenticating
with open('api_key_openai', 'r') as f:
    openai.api_key = f.read()
    
from langchain import OpenAI
from langchain.chains import LLMChain, ConversationChain
from langchain.chains.conversation.memory import (ConversationBufferMemory, 
                                                  ConversationSummaryMemory, 
                                                  ConversationBufferWindowMemory,
                                                  ConversationKGMemory)
from langchain.callbacks import get_openai_callback


In [6]:

llm = OpenAI(temperature=0.1, openai_api_key=openai.api_key)
text = "What would be a good company name for a company that makes colorful socks?"
llm(text)

'\n\nRainbow Sockery'

In [None]:
from langchain.callbacks import get_openai_callback

def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f'Spent a total of {cb.total_tokens} tokens')

    return result


In [None]:
from langchain import OpenAI, ConversationChain

llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
conversation = ConversationChain(llm=llm, verbose=True)

output = conversation.predict(input="Hi there!")
print(output)



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi there!
AI:[0m

[1m> Finished chain.[0m
 Hi there! It's nice to meet you. How can I help you today?


### Conversation

In [29]:
from langchain import OpenAI
from langchain.chains import ConversationChain

# first initialize the large language model
llm = OpenAI(
	temperature=0,
	openai_api_key=openai.api_key,
	model_name="gpt-3.5-turbo"
)


The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI:




In [45]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai.api_key)

#### ConversationBufferMemory

In [46]:

# now initialize the conversation chain
conversation = ConversationChain(llm=llm)
print(conversation.prompt.template)

The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI:


In [47]:
conversation_buf = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)
conversation_buf("Good morning AI!")



{'input': 'Good morning AI!',
 'history': '',
 'response': 'Good morning to you too! How can I assist you today?'}

In [48]:
conversation_buf("My interest here is to explore the potential of integrating Large Language Models with external knowledge")

{'input': 'My interest here is to explore the potential of integrating Large Language Models with external knowledge',
 'history': 'Human: Good morning AI!\nAI: Good morning to you too! How can I assist you today?',
 'response': "That's a fascinating topic! Large Language Models, such as GPT-3, have shown incredible capabilities in generating human-like text. However, integrating external knowledge can help improve their accuracy and relevance in specific domains. Some approaches to integrating external knowledge include using knowledge graphs, ontologies, or pre-trained models that incorporate domain-specific knowledge. Would you like me to provide more details on any of these approaches?"}

In [49]:
print(conversation_buf.memory.buffer)

Human: Good morning AI!
AI: Good morning to you too! How can I assist you today?
Human: My interest here is to explore the potential of integrating Large Language Models with external knowledge
AI: That's a fascinating topic! Large Language Models, such as GPT-3, have shown incredible capabilities in generating human-like text. However, integrating external knowledge can help improve their accuracy and relevance in specific domains. Some approaches to integrating external knowledge include using knowledge graphs, ontologies, or pre-trained models that incorporate domain-specific knowledge. Would you like me to provide more details on any of these approaches?


#### Conversation summary memory

#### Conversation summary memory

## Vector DB

In [78]:
from datasets import load_dataset

data = load_dataset("wikipedia", "20220301.simple", split='train[:10000]')
data

Found cached dataset wikipedia (/home/prabhu/.cache/huggingface/datasets/wikipedia/20220301.simple/2.0.0/aa542ed919df55cc5d3347f42dd4521d05ca68751f50dbc32bae2a7f1e167559)


Dataset({
    features: ['id', 'url', 'title', 'text'],
    num_rows: 10000
})

In [80]:
w = data.to_pandas()
w.head()

Unnamed: 0,id,url,title,text
0,1,https://simple.wikipedia.org/wiki/April,April,April is the fourth month of the year in the J...
1,2,https://simple.wikipedia.org/wiki/August,August,August (Aug.) is the eighth month of the year ...
2,6,https://simple.wikipedia.org/wiki/Art,Art,Art is a creative activity that expresses imag...
3,8,https://simple.wikipedia.org/wiki/A,A,A or a is the first letter of the English alph...
4,9,https://simple.wikipedia.org/wiki/Air,Air,Air refers to the Earth's atmosphere. Air is a...


### Chromadb

In [11]:
import chromadb

# Settings
from chromadb.config import Settings
client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory="./vector_db/" # Optional, defaults to .chromadb/ in the current directory
))
client.persist()
print(client.heartbeat())

# Embedding functions / Vectoriser
from chromadb.utils import embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                api_key=openai.api_key,
                model_name="text-embedding-ada-002"
            )

# Collections
collection = client.create_collection(name="my_collection", embedding_function=openai_ef)
collection = client.get_collection(name="my_collection", embedding_function=openai_ef)

collection.add(
    documents=["lorem ipsum...", "doc2", "doc3", ],
    metadatas=[{"chapter": "3", "verse": "16"}, 
               {"chapter": "3", "verse": "5"}, 
               {"chapter": "29", "verse": "11"}, ],
    ids=["id1", "id2", "id3", ]
)


Using embedded DuckDB with persistence: data will be stored in: ./vector_db/


1684003149221839437000
