# Running an LLM on your own laptop
In this notebook, we'll learn how to run an LLM on our own laptop using Hugging Face.

In [3]:
import os
from huggingface_hub import hf_hub_b_download

ModuleNotFoundError: No module named 'huggingface_hub'

In [2]:
HUGGING_FACE_API_KEY = os.environ.get("HUGGING_FACE_API_KEY")

In [72]:
# Replace this if you want to use a different model
model_id = "lmsys/fastchat-t5-3b-v1.0"
filenames = [
    "pytorch_model.bin", "added_tokens.json", "config.json", "generation_config.json", 
    "special_tokens_map.json", "spiece.model", "tokenizer_config.json"
]

for filename in filenames:
    downloaded_model_path = hf_hub_download(
        repo_id=model_id,
        filename=filename,
        token=HUGGING_FACE_API_KEY
    )

    print(downloaded_model_path)

/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/pytorch_model.bin
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/added_tokens.json
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/config.json
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/generation_config.json
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/special_tokens_map.json
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024/spiece.model
/Users/markhneedham/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf

In [4]:
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

In [6]:
import warnings
warnings.filterwarnings('always')

In [17]:
model_id = "lmsys/fastchat-t5-3b-v1.0"
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text2text-generation",
    model_kwargs={"temperature": 0, "max_length": 1000},
)

In [6]:
model_id = "lmsys/fastchat-t5-3b-v1.0"
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text2text-generation",
    pipeline_kwargs={"temperature": 0, "max_length": 1000},
)

In [7]:
llm("What is the capital of Scotland?")

'<pad> glasgow'

In [8]:
llm("Whos is Cristiano Ronaldo? Tell me about him.")

'<pad> Cristiano  Ronaldo is  a  Portuguese footballer who  plays  as  a striker for  Real  Madrid.'

In [13]:
template = """
You are a friendly chatbot assistant that responds conversationally to users' questions.
Keep the answers short, unless specifically asked by the user to elaborate on something.

Question: {question}

Answer:"""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [15]:
import pprint
pprint.pprint(llm_chain("What is the capital of England?"))

{'question': 'What is the capital of England?', 'text': '<pad> London.\n'}


In [16]:
pprint.pprint(llm_chain("Who is Lionel Messi? Is he better than Cristiano Ronaldo?"))

{'question': 'Who is Lionel Messi? Is he better than Cristiano Ronaldo?',
 'text': '<pad> Lionel  Messi  is  a  Spanish  professional  footballer  who  '
         'plays  for  Barcelona  and  is  considered  one  of  the  best  '
         'players  in  the  world.  Cristiano  Ronaldo  is  a  Portuguese  '
         'professional  footballer  who  plays  for  Real  Madrid  and  is  '
         'considered  one  of  the  best  players  in  the  world.  Both  '
         'players  have  won  numerous  awards  and  accolades  for  their  '
         'performances  on  the  field.\n'}


In [74]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipeline = pipeline(
    "text2text-generation",
    model=model, 
    device=-1,
    tokenizer=tokenizer, 
    max_length=1000
)

local_llm = HuggingFacePipeline(pipeline=pipeline)

In [75]:
local_llm("Who is Cristiano Ronaldo? Tell me a few paragraphs about him")

'<pad> Cristiano  Ronaldo  is  a  Portuguese  professional  footballer  who  plays  as  a  forward  for  Real  Madrid  in  the  Spanish  Primera  Liga.  He  is  the  current  record  holder  for  most  goals  scored  in  a  single  season  in  the  Premier  League,  with  a  total  of  81.\n'

In [34]:
local_llm("What are competitors to Apache Kafka?")

'<pad> Apache Kafka is a popular open source message broker that is used for streaming and aggregating data from multiple sources. Some of its competitors include Apache Spark, Apache Storm, Apache Flume, and Apache Flink.'

In [49]:
import utils, importlib
importlib.reload(utils)
from utils import check_connectivity, toggle_wifi
print(check_connectivity())


You have Network Connectivity (IP Address: 192.168.86.249)


In [55]:
toggle_wifi("on")

WiFi disabled.


In [63]:
check_connectivity()

'You have Network Connectivity (IP Address: 192.168.86.249)'

In [79]:
local_llm("""
My name is Mark
My brothers are Will and Paul.
My cousins are David and James.

Using only the context above. Who are my brothers? 

""")

'<pad> Will  and  Paul  are  my  brothers.\n'

In [81]:
local_llm("""
My name is Mark
My brothers are Will and Paul.

Using only the context above. Do you know if I have a sister?
""")

'<pad> No,  I  do  not  have  a  sister.\n'

In [80]:
local_llm("""
My name is Mark
My brothers are Will and Paul.

Using only the context above. Can you name my father?
""")

'<pad> No,  you  cannot  name  my  father  based  on  the  information  given.\n'

In [73]:
local_llm("""
Convert the following English to SQL:

Count how many locations we have in the webtraffic table
""")

'<pad> SELECT COUNT(*) FROM webtraffic'