In [None]:
from dotenv import load_dotenv
import os
from transformers import AutoTokenizer,pipeline, GenerationConfig

In [None]:
load_dotenv()
os.environ['HF_HOME'] = os.getenv('HF_HOME')
MODEL_NAME = os.getenv('MODEL_NAME')
MODEL_FILE = os.getenv('MODEL_FILE')
TOKENIZER = os.getenv('TOKENIZER')
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv('HF_API')

In [None]:
# load the tokenizer from HF
toeknizer = AutoTokenizer.from_pretrained(TOKENIZER,use_fast=True)

In [None]:
# Creating general configuration
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 512
generation_config.temperature = 0.47
generation_config.remove_invalid_values = True
generation_config.do_sample = True


In [None]:
# Use a pipeline as a high-level helper
base_pipeline = pipeline(task="text-generation", 
                model=MODEL_NAME,
                revision=MODEL_FILE,
                tokenizer=toeknizer,
                device_map='auto',
                use_fast=True,
                trust_remote_code=True,
                generation_config=generation_config,
                # model_kwargs={'temperature':0.47,
                #               'max_new_tokens':512,
                #               'stream':True
                #             }
                )

#### Create a sample prompt and test with Langchain

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline

In [None]:
# Intergarting the Model into Langchain
LC_model = HuggingFacePipeline(pipeline=base_pipeline,
                                model_kwargs={'temperature':0.47,
                                            'max_new_tokens':512,
                                            }
                            )

In [None]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage

template_messages = [
    SystemMessage(content="Act as a casual chat bot and chat with user as a casual friend. Do not share false information."),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{text}"),
]
prompt_template = ChatPromptTemplate.from_messages(template_messages)

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
chain = LLMChain(llm=LC_model, prompt=prompt_template, memory=memory)

In [None]:
chain.predict(text='Hello')