In [1]:
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

tokenizer = LlamaTokenizer.from_pretrained("chavinlo/alpaca-native")

# pip install streamlit PyPDF2 transformers langchain

# chavinlo/alpaca-13b
# chavinlo/gpt4-x-alpaca
# decapoda-research/llama-7b-hf

# - `lmsys/vicuna-7b-delta-v1.1`: A 7 billion parameter version of Vicuna that is trained with delta updates⁴.
# - `stability-ai/stablevicuna`: A version of Vicuna that is further fine-tuned and trained with reinforced learning from human feedback³.
# - `chavinlo/vicuna-native`: A replica of Vicuna that is natively fine-tuned without using LORA¹.

base_model = LlamaForCausalLM.from_pretrained(
    "chavinlo/alpaca-native",
    load_in_8bit=True,
    device_map='auto',
    cache_dir='/path/to/cache/'
)

Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 2.62MB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 21.0/21.0 [00:00<00:00, 21.0kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 96.0/96.0 [00:00<?, ?B/s]
Downloading (…)okenizer_config.json: 100%|██████████| 335/335 [00:00<?, ?B/s] 
Downloading (…)lve/main/config.json: 100%|██████████| 556/556 [00:00<?, ?B/s] 
Downloading (…)model.bin.index.json: 100%|██████████| 26.8k/26.8k [00:00<00:00, 13.4MB/s]
Downloading shards:   0%|          | 0/3 [36:36<?, ?it/s]


KeyboardInterrupt: 



In [None]:
pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer, 
    max_length=256,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain import PromptTemplate, LLMChain

template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction: 
{instruction}

Answer:"""

prompt = PromptTemplate(template=template, input_variables=["instruction"])

In [None]:
llm_chain = LLMChain(prompt=prompt, 
                     llm=local_llm
                     )

question = "What is the capital of England?"

print(llm_chain.run(question))

In [None]:
question = "What are alpacas? and how are they different from llamas?"

print(llm_chain.run(question))

In [None]:
### With Memory

In [None]:
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

In [None]:
# We are going to set the memory to go back 4 turns
window_memory = ConversationBufferWindowMemory(k=4)

In [None]:
conversation = ConversationChain(
    llm=local_llm, 
    verbose=True, 
    memory=window_memory
)

In [None]:
conversation.prompt.template

In [None]:
conversation.prompt.template = '''The following is a friendly conversation between a human and an AI called Alpaca. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. 

Current conversation:
{history}
Human: {input}
AI:'''

In [None]:
conversation.predict(input="Hi there! I am Tapan")