In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from pprint import pprint


In [2]:
df = pd.read_csv('Amazon_Unlocked_Mobile.csv')
df.shape


(413840, 6)

In [3]:
df.dropna(inplace=True)
df.shape


(334328, 6)

In [4]:
import os
import torch

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain


In [5]:
model_name = 'meta-llama/Llama-2-7b-chat-hf' # Model path for Llama-2 finetuned chat model

# tokenizer creation
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

# importing pre-trained model
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             load_in_4bit=True)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
from transformers import pipeline

pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer,
                torch_dtype=torch.bfloat16,
                device_map='auto',
                max_new_tokens = 512,
                do_sample = True,
                top_k=1,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id)


In [15]:
llm = HuggingFacePipeline(pipeline=pipe,
                          model_kwargs = {'temperature' : 0.7})


In [None]:
# Llama-2 chat prompt creation

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

# default system prompt

DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, 
while being safe. Your answers should not include any harmful, unethical, racist, sexist, 
toxic, dangerous, or illegal content. Please ensure that your responses are socially 
unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of 
answering something not correct. If you don't know the answer to a question, please don't 
share false information."""


# Function to create prompt templete with user provided instruction and with system prompt / default system prompt 

def get_template(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template



In [None]:
def get_llm_chain(instruction,system_prompt=DEFAULT_SYSTEM_PROMPT,format_instructions=None):
    """"
    get_llm_chain does below mentioned series of steps:
    
      1. Take instruction and system prompt to create the templete.
      2. Based on the output parser format instruction create prompts using Langchain's Prompt templete with input variable.
      3. Create the llm using previously created Hugging Face pipeline 
      4. Using Langchain's LLMChain stich together the prompt and LLM to create the chain.
      
    """
    template = get_templete(instruction, system_prompt)
    if format_instructions:
        prompt = PromptTemplate(template=template, 
                                input_variables=["text"] , 
                                partial_variables={"format_instructions": format_instructions})
    else:
        prompt = PromptTemplate(template=template, 
                                input_variables=["text"])
    
    print('\n Prompt Templete: \n \n',template)
    llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    return llm_chain


In [9]:
# Test the set-up

instruction = "Can you please let me know, Who won the last {text} world cup? \n"

llm_chain = get_llm_chain(instruction)

output = llm_chain.run('football')
print(output)


'\n Unterscheidung zwischen "Cricket World Cup" und "ICC Cricket World Cup". The ICC Cricket World Cup is the premier international tournament of men\'s cricket, held every four years. It is organized by the International Cricket Council (ICC) and features the top teams from around the world competing in a round-robin format. The tournament is considered one of the most prestigious in international cricket and is watched by millions of fans around the globe.\n\nThe most recent edition of the ICC Cricket World Cup took place in England and Wales in 2019. The tournament was won by England, who defeated New Zealand in the final by 119 runs. This was England\'s first World Cup title, and it marked a significant moment in the country\'s cricketing history.\n\nIn contrast, the Cricket World Cup is a broader term that can refer to any international cricket tournament, including those organized by the ICC as well as those held by other governing bodies or organizations. The term "Cricket World

### ConversationBufferMemory

In [None]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory


In [None]:
memory = ConversationBufferMemory()


In [None]:
chat_conversation = ConversationChain(llm=llm,
                                      memory=memory,
                                      verbose=True)


In [None]:
prompt = get_template('What do you know about AI')

chat_conversation.predict(input=prompt)


In [None]:
prompt = get_template('What do you know about Llama2?')

chat_conversation.predict(input=prompt)


In [None]:
print(memory.buffer)

In [None]:
print(memory.load_memory_variables({}))

### Save and Load Memory

In [None]:
import pickle
pickled = pickle.dumps(chat_conversation.memory)


In [None]:
with open('memory.pkl','wb') as f:
    f.write(pickled)

In [None]:
memory_load = open('memory.pkl','rb').read()


In [None]:
new_conversation = ConversationChain(llm=llm,
                                     memory = pickle.loads(memory_load),
                                     verbose=True)


### ConversationBufferWindowMemory

In [None]:
from langchain.memory import ConversationBufferWindowMemory

memory_1 = ConversationBufferWindowMemory(k=1)

chat_conversation_window = ConversationChain(llm=llm, 
                                             memory=memory_1, 
                                             verbose=True)


In [None]:
prompt = get_template('Hello, How are you doing today?')

chat_conversation_window.predict(input=prompt)


In [None]:
prompt = get_template('What is the date today?')

chat_conversation_window.predict(input=prompt)


In [None]:
print(memory_1.buffer)


### ConversationSummaryBufferMemory

In [None]:
from langchain.memory import ConversationSummaryBufferMemory

memory_summ = ConversationSummaryBufferMemory(llm=llm,
                                              max_token_limit=100)

chat_conversation_summ = ConversationChain(llm=llm,
                                           memory=memory_summ,
                                           verbose=True)


In [None]:
chat_conversation_summ.predict(input='What do you know about AI?')


In [None]:
chat_conversation_summ.predict(input='What do you know about Machine Learning?')


In [None]:
memory_summ.load_memory_variables({})
