In [1]:
import yaml, os
from langchain import PromptTemplate
from transformers import AutoTokenizer
from langchain.chains import ConversationChain
from langchain.llms import CTransformers, HuggingFaceHub
from langchain.chains.conversation.memory import ConversationBufferMemory

In [2]:
with open('cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['HUGGINGFACEHUB_API_TOKEN'] = credentials['HUGGINGFACEHUB_API_TOKEN']

In [3]:
# llm = CTransformers(
#                     model = "TheBloke/Llama-2-7B-Chat-GGML",
#                     model_type="llama",
#                     max_new_tokens = 512,
#                     temperature = 0.5
#                     )

llm_flan_ul2 = HuggingFaceHub(
                            repo_id="google/flan-ul2", 
                            model_kwargs={"temperature":0.1,
                                        "max_new_tokens":256}
                            )

llm_flan_t5 = HuggingFaceHub(
                            repo_id="google/flan-t5-xl",
                            model_kwargs={"temperature":0 },
                            huggingfacehub_api_token=credentials['HUGGINGFACEHUB_API_TOKEN']
                            )

In [4]:
memory = ConversationBufferMemory()

conversation = ConversationChain(
                                llm=llm_flan_ul2, 
                                verbose=True, 
                                memory=memory
                                )

In [5]:
conversation.predict(input="Hi there! I am Zuu")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi there! I am Zuu
AI:[0m

[1m> Finished chain.[0m


'Hi there! I am Zuu.'

In [6]:
conversation.predict(input="Do you like to play football?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI:[0m

[1m> Finished chain.[0m


'Yes, I like to play football.'

In [7]:
conversation.predict(input="What is your name?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI:[0m

[1m> Finished chain.[0m


'My name is Zuu.'

In [8]:
conversation.predict(input="Tell me something about football ?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI: My name is Zuu.
Human: Tell me something about football ?
AI:[0m

[1m> Finished chain.[0m


'Football is a game played between two teams of eleven players on a field with a ball.'

### but how we can count and track tokens ?
     - you can AutoTokenizer from transformers

In [9]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")

In [10]:
# catch memory
memory_buffer = memory.buffer
print(memory_buffer)

Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI: My name is Zuu.
Human: Tell me something about football ?
AI: Football is a game played between two teams of eleven players on a field with a ball.


In [11]:
print(tokenizer.tokenize(memory_buffer))
print(len(tokenizer.tokenize(memory_buffer)))

['▁Human', ':', '▁Hi', '▁there', '!', '▁I', '▁am', '▁Zu', 'u', '▁AI', ':', '▁Hi', '▁there', '!', '▁I', '▁am', '▁Zu', 'u', '.', '▁Human', ':', '▁Do', '▁you', '▁like', '▁to', '▁play', '▁football', '?', '▁AI', ':', '▁Yes', ',', '▁I', '▁like', '▁to', '▁play', '▁football', '.', '▁Human', ':', '▁What', '▁is', '▁your', '▁name', '?', '▁AI', ':', '▁My', '▁name', '▁is', '▁Zu', 'u', '.', '▁Human', ':', '▁Tell', '▁me', '▁something', '▁about', '▁football', '▁', '?', '▁AI', ':', '▁Football', '▁is', '▁', 'a', '▁game', '▁played', '▁between', '▁two', '▁teams', '▁of', '▁eleven', '▁players', '▁on', '▁', 'a', '▁field', '▁with', '▁', 'a', '▁ball', '.']
85


In [12]:
conversation.predict(input="Name few football teams?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI: My name is Zuu.
Human: Tell me something about football ?
AI: Football is a game played between two teams of eleven players on a field with a ball.
Human: Name few football teams?
AI:[0m

[1m> Finished chain.[0m


'The Manchester United Football Club is a football club based in Old Trafford, Greater Manchester, England.'

In [13]:
memory_buffer = memory.buffer
print(memory_buffer)

Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI: My name is Zuu.
Human: Tell me something about football ?
AI: Football is a game played between two teams of eleven players on a field with a ball.
Human: Name few football teams?
AI: The Manchester United Football Club is a football club based in Old Trafford, Greater Manchester, England.


In [14]:
print(tokenizer.tokenize(memory_buffer))
print(len(tokenizer.tokenize(memory_buffer)))

['▁Human', ':', '▁Hi', '▁there', '!', '▁I', '▁am', '▁Zu', 'u', '▁AI', ':', '▁Hi', '▁there', '!', '▁I', '▁am', '▁Zu', 'u', '.', '▁Human', ':', '▁Do', '▁you', '▁like', '▁to', '▁play', '▁football', '?', '▁AI', ':', '▁Yes', ',', '▁I', '▁like', '▁to', '▁play', '▁football', '.', '▁Human', ':', '▁What', '▁is', '▁your', '▁name', '?', '▁AI', ':', '▁My', '▁name', '▁is', '▁Zu', 'u', '.', '▁Human', ':', '▁Tell', '▁me', '▁something', '▁about', '▁football', '▁', '?', '▁AI', ':', '▁Football', '▁is', '▁', 'a', '▁game', '▁played', '▁between', '▁two', '▁teams', '▁of', '▁eleven', '▁players', '▁on', '▁', 'a', '▁field', '▁with', '▁', 'a', '▁ball', '.', '▁Human', ':', '▁Name', '▁few', '▁football', '▁teams', '?', '▁AI', ':', '▁The', '▁Manchester', '▁United', '▁Football', '▁Club', '▁is', '▁', 'a', '▁football', '▁club', '▁', 'based', '▁in', '▁Old', '▁Tra', 'f', 'ford', ',', '▁Greater', '▁Manchester', ',', '▁England', '.']
117


### but this token count only measure the histrory / memory of the model. what if somehow the current user query exceed the token limit ?
     - we can add the next query to prompt itself and continue the conversation
     - for most of the cases, this will work fine. but what if the user query is too long ?

In [15]:
formatted_prompt = conversation.prompt.format(input='the next input',history=memory.buffer)
print(formatted_prompt)

The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there! I am Zuu
AI: Hi there! I am Zuu.
Human: Do you like to play football?
AI: Yes, I like to play football.
Human: What is your name?
AI: My name is Zuu.
Human: Tell me something about football ?
AI: Football is a game played between two teams of eleven players on a field with a ball.
Human: Name few football teams?
AI: The Manchester United Football Club is a football club based in Old Trafford, Greater Manchester, England.
Human: the next input
AI:


### Now we added the user query to prompt itself

In [18]:
def chat_to_llm(chat_llm):
    conversation_total_tokens = 0
    new_conversation = ConversationChain(
                                        llm=chat_llm, 
                                        verbose=False, 
                                        memory=ConversationBufferMemory()
                                        )
    
    while True:
        message = input("Human: ")
        message_cp = message
        if message=='exit':
            print(f"{conversation_total_tokens} tokens used in total in this conversation")
            break
        if message:
            formatted_prompt = conversation.prompt.format(input=message,history=new_conversation.memory.buffer) # here we do the trick as add current input to the history
            num_tokens = len(tokenizer.tokenize(formatted_prompt))
            conversation_total_tokens += num_tokens
            print(f'tokens sent {num_tokens}')
            response = new_conversation.predict(input=message)
            response_num_tokens = len(tokenizer.tokenize(response))
            conversation_total_tokens += response_num_tokens
            print(f"HUMAN: {message_cp}")
            print(f"LLM: {response}")

In [19]:
chat_to_llm(llm_flan_ul2)

tokens sent 64
HUMAN: hi, there !
LLM: hi, there!
tokens sent 78
HUMAN: what is your name ?
LLM: my name is sarah.
tokens sent 102
HUMAN: hi sarah, are you like football ?
LLM: i like football.
tokens sent 117
HUMAN: name few football teams ?
LLM: i dont know
382 tokens used in total in this conversation
