# Local

In [14]:
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFaceHub, PromptTemplate, LLMChain

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).cuda()

pipe = pipeline(
    "text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=1024
)

local_llm = HuggingFacePipeline(pipeline=pipe)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Downloading shards: 100%|██████████| 4/4 [00:00<00:00, 8886.24it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.49it/s]
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


## Boilerplate

In [3]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(prompt=prompt, 
                     llm=local_llm
                     )

question = "What is the capital of England?"

print(llm_chain.run(question))


## ZeroShot with Pipe

In [90]:
from langchain import PromptTemplate, LLMChain

# Define the prompt template with detailed instructions
instruction = """
You are an information extraction model which extracts attributes given an input text.
Extract the following music attributes from the given Reddit post:
- Work of Art (WoA): The title of the song or album mentioned in the text.
- Performer: Performer(s) of the song or album mentioned in the text.
- Additional Performers: Performers who are not explicitly mentioned in the source text but are relevant.
- Title Contextual Cue: Text from the source that indicates the song or album title (e.g. "the song" in the phrase "the song X").
- Performer Contextual Cue: Text from the source that indicates the performer(s) (e.g. "is a song by" in the phrase "is a song by A").

Provide a structured output in JSON format with the following keys:
- title: (string) representing the WoA or song titles or album titles mentioned in the text.
- performer: (string) performer(s) of the song or album mentioned in the text.
- performer_unmentioned: (string) additional performers not contained in the source text.
- title_cue: (string) text from the source indicating the song title/album title.
- performer_cue: (string) text from the source indicating the performer.

Your output should be a JSON object structured as described above.
"""

suffix = "Here is the source text: {source_text}"
template = instruction + suffix


pipe = pipeline(
    "text-generation",
    model=model, 
    tokenizer=tokenizer,
    max_length=1024,
    device="cuda"
)

local_llm = HuggingFacePipeline(pipeline=pipe)

prompt = PromptTemplate(
    input_variables=["source_text"], 
    template=template
)

llm_chain = LLMChain(llm=local_llm, prompt=prompt)

# Example usage
source_text = "Check out Blinding Lights by The Weeknd. It is so good!"
result = llm_chain.run({"source_text": source_text})
print(result)


TypeError: Could not infer framework from class <class 'langchain_community.chat_models.ollama.ChatOllama'>.

In [61]:
# LangChain supports many other chat models. Here, we're using Ollama
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# supports many more optional parameters. Hover on your `ChatOllama(...)`
# class to view the latest available supported parameters
llm = ChatOllama(model="llama3")
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")

# using LangChain Expressive Language chain syntax
# learn more about the LCEL on
# /docs/concepts/#langchain-expression-language-lcel
chain = prompt | llm | StrOutputParser()

# for brevity, response is printed in terminal
# You can use LangServe to deploy your application for
# production
print(chain.invoke({"topic": "Space travel"}))


Why did the astronaut break up with his girlfriend before going to Mars?

Because he needed space! (get it?)


# OpenAI

In [None]:
import os
from langchain_openai import ChatOpenAI

with open("../keys/openai.txt", "r") as f:
    key = f.read()
    
os.environ["OPENAI_API_KEY"] = key

model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

class Joke(BaseModel):
    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")

structured_llm = model.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")


## Structured Output

In [74]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_community.chat_models import ChatOllama

# Define your desired data structure.
class WorkOfArt(BaseModel):
    title: str = Field(description="The title of the song or album mentioned in the text.")
    title_cue: str = Field(description="Text from the source indicating the song title/album title")
    performer: str = Field(description="Performer(s) of the song or album mentioned in the text.")
    performer_unmentioned: str = Field(description="Performers who are not explicitly mentioned in the source text but are relevant.")
    performer_cue: str = Field(description="Text from the source indicating the performer.")


model = ChatOllama(model="llama3")


In [87]:
from langchain_core.output_parsers import PydanticOutputParser

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=WorkOfArt)


In [84]:

prompt = PromptTemplate(
    template="""Extract the following music attributes from the given Reddit post.
    Here are the formatting instructions:{format_instructions}
    Here is the source text:\n{source_text}""",
    input_variables=["source_text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

result = chain.invoke({"source_text": source_text})
result


WorkOfArt(title='Blinding Lights', title_cue='by The Weeknd. It is so good!', performer='The Weeknd', performer_unmentioned='', performer_cue='')

PydanticOutputParser(pydantic_object=<class '__main__.WorkOfArt'>)

In [33]:
chain = LLMChain(llm=local_llm, prompt=prompt)
generated_output = chain({"source_text": source_text})

# Parse the output using the parser
result = parser.parse(generated_output)



  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

In [25]:
result

{'properties': {'title': {'title': 'Title',
   'description': 'The title of the song or album mentioned in the text.',
   'type': 'string'},
  'title_cue': {'title': 'Title Cue',
   'description': 'Text from the source indicating the song title/album title',
   'type': 'string'},
  'performer': {'title': 'Performer',
   'description': 'Performer(s) of the song or album mentioned in the text.',
   'type': 'string'},
  'performer_unmentioned': {'title': 'Performer Unmentioned',
   'description': 'Performers who are not explicitly mentioned in the source text but are relevant.',
   'type': 'string'},
  'performer_cue': {'title': 'Performer Cue',
   'description': 'Text from the source indicating the performer.',
   'type': 'string'}},
 'required': ['title',
  'title_cue',
  'performer',
  'performer_unmentioned',
  'performer_cue']}

In [18]:
prompt.pretty_print()

Extract the following music attributes from the given Reddit post. Here is the source text:
[33;1m[1;3m{source_text}[0m
Here are the formatting instructions:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"title": {"title": "Title", "description": "The title of the song or album mentioned in the text.", "type": "string"}, "title_indicator": {"title": "Title Indicator", "description": "Text from the source indicating the song title/album title", "type": "string"}, "performer": {"title": "Performer", "description": "Performer(s) of the song or album mentioned in the text."

## Few Shot

In [19]:
from langchain import FewShotPromptTemplate

# Few-shot examples (optional)
examples = [
{
    "source_text": "I just listened to Shape of You by Ed Sheeran. It's amazing!",
    "title": "Shape of You",
    "performer": "Ed Sheeran",
    "performer_unmentioned": "",
    "title_cue": "listened to",
    "performer_cue": "by"
},
{
    "source_text": "The album 'Abbey Road' by The Beatles is a classic.",
    "title": "Abbey Road",
    "performer": "The Beatles",
    "performer_unmentioned": "",
    "title_cue": "The album",
    "performer_cue": "by"
}
]

example_prompt = PromptTemplate(
    input_variables=["source_text", "title", 
                     "performer", "performer_unmentioned",
                     "title_indicator", "performer_indicator"], 
                     template="""Source text: {source_text}; Output: 
                            'title': {title}, 'performer': {performer}, 
                            'performer_unmentioned': {performer_unmentioned},
                            'title_indicator': {title_indicator},
                            'performer_indicator': {performer_indicator}"
                            """
)


fewshot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=instruction + "\nHere are some examples: ",
    suffix=suffix,
    input_variables=["source_text"],
)


In [20]:
prompt.pretty_print()

Extract the following music attributes from the given Reddit post. Here is the source text:
[33;1m[1;3m{source_text}[0m
Here are the formatting instructions:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"title": {"title": "Title", "description": "The title of the song or album mentioned in the text.", "type": "string"}, "title_indicator": {"title": "Title Indicator", "description": "Text from the source indicating the song title/album title", "type": "string"}, "performer": {"title": "Performer", "description": "Performer(s) of the song or album mentioned in the text."

# Hub

In [8]:
import os

with open("../keys/huggingface.txt", "r") as f:
    api_token = f.read()

os.environ['HUGGINGFACEHUB_API_TOKEN'] = api_token


In [27]:

llm = HuggingFaceHub(
    repo_id="meta-llama/Meta-Llama-3-70B-Instruct",
    model_kwargs={"temperature":0, "max_length":180}
)



In [22]:
llm_chain = LLMChain(prompt=prompt, 
                     llm=HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B-Instruct", 
                                        model_kwargs={"temperature":0.001, 
                                                      "max_length":64}))


In [23]:
question = "What is the capital of France?"

print(llm_chain.run(question))


Question: What is the capital of France?

Answer: Let's think step by step. France is a country located in Western Europe. The capital of France is... Paris! That's right! The City of Light, famous for its iconic landmarks like the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum. Voilà! 🇫🇷👍
#### 1.5/1.5 points
#### 100% accuracy
#### 1.5/1.5 points
#### 100% accuracy
#### 1
