# LangChain + Llama2 Basic usage
https://python.langchain.com/docs/get_started/quickstart

## Example 1: ChatGPT Demo

In [None]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

openai_api_key="<OpenAI-KEY>"
llm = OpenAI(openai_api_key=openai_api_key)
chat_model = ChatOpenAI(openai_api_key=openai_api_key)

In [None]:
from mylib.Utils import myprint
text = "hi"
myprint("-----LLM predict-------")
myprint(llm.predict(text))
myprint("-----CHAT LLM predict-------")
myprint(chat_model.predict(text))

from langchain.schema import HumanMessage
messages = [HumanMessage(content=text)]
myprint("-----LLM predict_message-------")
myprint(llm.predict_messages(messages))
myprint("-----CHAT LLM predict_message-------")
myprint(repr(chat_model.predict_messages(messages)))


## Example 2: Custom Model Usage (HuggingFace Model)
Note: please refer to ./lib/MyModelUtils.py

In [4]:
## Test model 
from mylib.MyModelUtils import MyModelUtils
from mylib.Utils import timeit, myprint
import transformers
import os

model_id=os.path.abspath('./models/Llama-2-7b-chat-hf')
model_util = MyModelUtils(model_id = model_id)


@timeit
def hf_model_load_timeit():
    # 1. Load and modify default modelcfg from llm file 
    #    modelcfg=transformers.AutoConfig.from_pretrained("modelname",.....)
    modelcfg_kwargs = model_util.default_modelconf_kwargs 
    pretrained_kwargs = model_util.make_model_kwargs_for_pretrained(**modelcfg_kwargs)
    # 2. Load llm 
    #    llm = transformers.AutoModelForCausalLM.from_pretrained("modelname",...,config=modelcfg,....),
    hf_model = model_util.init_model(**pretrained_kwargs)
    # 3. Load tokenizer
    tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_model_name_or_path = model_id, **pretrained_kwargs)
    return hf_model, tokenizer

hf_model, tokenizer=hf_model_load_timeit()

# Actually run the thing
@timeit
def hf_model_generate_timeit(hf_model, tokenizer):
    prompt = "Explain me the difference between Data Lakehouse and Data Warehouse."
    # Pass the prompt to the tokenizer
    print(hf_model.device)
    # 3. tokenize the inputs to pytorch
    inputs = tokenizer(prompt, return_tensors="pt").to(hf_model.device)
    # 4. input -> hf_model -> output
    output = hf_model.generate(**inputs, use_cache=True, max_new_tokens=256)
    # print(output)
    myprint(tokenizer.decode(output[0], skip_special_tokens=False))

hf_model_generate_timeit(hf_model, tokenizer)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Function hf_model_load_timeit() {} Took 5.8110 seconds
cuda:0
'<s> Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between Data Lakehouse and Data Warehouse.\n\nA data warehouse is a centralized repository that stores data in a structured and organized manner, making it easily accessible and queryable. On the other hand, a data lakehouse is a repository that stores data in its raw and unprocessed form, allowing for flexible and efficient querying and analysis of large datasets.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'
Function hf_model_generate_timeit(LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_

## Example 3: Custom Model Usage (HuggingFace pipeline)
Note: please refer to ./lib/MyModelUtils.py

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
import os
from mylib.MyModelUtils import MyModelUtils
from mylib.Utils import timeit, myprint
import transformers
model_id=os.path.abspath('./models/Llama-2-7b-chat-hf')
model_util = MyModelUtils(model_id = model_id)

@timeit
def hf_pipeline_init_timeit():
    modelcfg_kwargs = model_util.default_modelconf_kwargs 
    pretrained_kwargs = model_util.make_model_kwargs_for_pretrained(**modelcfg_kwargs)
    return model_util.init_hf_pipeline(pretrained_kwargs)
hf_pipeline = hf_pipeline_init_timeit()

@timeit
def hf_pipeline_timeit():
    return hf_pipeline("Explain me the difference between Data Lakehouse and Data Warehouse.")
    
res = hf_pipeline_timeit()
myprint(res)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Function hf_pipeline_init_timeit() {} Took 5.4456 seconds
Function hf_pipeline_timeit() {} Took 7.9015 seconds
[{'generated_text': "Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between Data Lakehouse and Data Warehouse. A data lakehouse is a centralized repository that stores all of an organization's data, both structured and unstructured, in a single location. A data warehouse, on the other hand, is a repository that stores data in a structured format, typically in a relational database management system (RDBMS).\n\n\n\n"}]


## Example 4: Custom Model Usage (LangChain HuggingFace pipeline)
Note: please refer to ./lib/MyModelUtils.py

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.schema import HumanMessage
from mylib.MyModelUtils import MyModelUtils
from mylib.Utils import timeit, myprint
model_id=os.path.abspath('./models/Llama-2-7b-chat-hf')
model_util = MyModelUtils(model_id = model_id)

@timeit
def langchain_pipeline_init_timeit():
    modelcfg_kwargs = model_util.default_modelconf_kwargs 
    pretrained_kwargs = model_util.make_model_kwargs_for_pretrained(**modelcfg_kwargs)
    hf_pipeline = model_util.init_hf_pipeline(pretrained_kwargs)
    
    langchain_hf_llm = HuggingFacePipeline(pipeline=hf_pipeline,
                            pipeline_kwargs={'batch_size':128},
                         )
    return langchain_hf_llm

langchain_hf_llm = langchain_pipeline_init_timeit()

text = "Explain me the difference between Data Lakehouse and Data Warehouse."

@timeit
def langchain_pipeline_call_timeit():
    return langchain_hf_llm(prompt=text)

myprint(langchain_pipeline_call_timeit())

@timeit
def langchain_pipeline_predict_timeit():
    print("-----langchain predict-------")
    print(langchain_hf_llm.predict(text))

langchain_pipeline_predict_timeit()

@timeit
def langchain_pipeline_predict_message_timeit():
    print("-----langchain predict_message-------")
    messages = [HumanMessage(content=text)]
    print(langchain_hf_llm.predict_messages(messages))
langchain_pipeline_predict_message_timeit()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Function langchain_pipeline_init_timeit() {} Took 5.6001 seconds
Function langchain_pipeline_call_timeit() {} Took 7.8995 seconds
" Unterscheidung between Data Lakehouse and Data Warehouse. A data lakehouse is a centralized repository that stores all of an organization's data, both structured and unstructured, in a single location. A data warehouse, on the other hand, is a repository that stores data in a structured format, typically in a relational database management system (RDBMS).\n\n\n\n"
-----langchain predict-------
