In [1]:
import torch
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Check for GPU

torch.cuda.is_available()

True

In [3]:
model = "databricks/dolly-v2-7b"

In [6]:
generate_text = pipeline(model=model, 
                         torch_dtype=torch.bfloat16, 
                         trust_remote_code=True,
                         device_map="auto")

In [7]:
out = generate_text("Explain what is an event horizon in a black hole?")

KeyboardInterrupt: 

In [45]:
out

[{'generated_text': 'According to general relativity, a black hole is a region of space-time with no escape to tell us that we have entered the event horizon of the black hole.  The region inside the event horizon is called the Black Hole Region.  It is not possible to send a signal to a location outside the event horizon of a black hole.  Anything that falls into a black hole must forever be caught inside the event horizon, and beyond that no one or nothing can escape.  This implies that a black hole is a region from which noescape can be determined.'}]

In [7]:
generate_text("Explain self-attention and transformer in a short paragraph.")

[{'generated_text': 'Self-attention is an energy-efficient way to get vectorized representations of inputs using only multiplication, addition and element-wise scaling. Transformers are based on the dot-product formulation of the softmax function, which is a common loss function used in machine learning. This loss function yields good performance on common machine learning tasks like machine translation and sentence similarity. Transformer also uses a learnable position-wise multiplier that efficiently passes representation updates between layers.'}]

In [8]:
out = generate_text("Write a Python function to calculate Fibonacci numbers")

In [9]:
print(out)

[{'generated_text': 'def fibonacci(n):\n    if n < 2:\n        return n\n    return fibonacci(n - 1) + fibonacci(n - 2)'}]


## Prompt engineering

In [11]:
### Sentiment Analysis

In [12]:
review = """
Needed a nice lamp for my bedroom, and this one had \
additional storage and not too high of a price point. \
Got it fast.  The string to our lamp broke during the \
transit and the company happily sent over a new one. \
Came within a few days as well. It was easy to put \
together.  I had a missing part, so I contacted their \
support and they very quickly got me the missing piece! \
Lumina seems to me to be a great company that cares \
about their customers and products!!
"""

In [15]:

prompt = f"""
What is the sentiment of the following product review, 
which is delimited with triple backticks?

Give your answer as a single word, either "positive" \
or "negative".

Review text: '''{review}'''
"""

response = generate_text(prompt)
print(response)

[{'generated_text': 'positive'}]


In [17]:
## Emotions

prompt = f"""
Identify a list of emotions that the writer of the \
following review is expressing. Include no more than \
five items in the list. Format your answer as a list of \
lower-case words separated by commas.

Review text: '''{review}'''
"""
response = generate_text(prompt)
print(response)

[{'generated_text': 'Disappointed, Surprised, Sad, angry, frustrated, grateful'}]


In [24]:
prompt = f"""
Identify the following items from the review text: 
- Item purchased by reviewer. For example - "Table"
- Company that made the item

The review is delimited with triple backticks. 
Format your response as a JSON object with  "Item" and "Brand" as the keys. 

If the information isn't present, use "unknown" as the value.

Make your response as short as possible.
  
Review text: '''{review}'''
"""
response =  generate_text(prompt)
print(response)



[{'generated_text': '"Item": "Lumina",\n"Brand": "Lumina"'}]


In [28]:
prompt = f"""
Identify the following items from the review text: 
- Sentiment (positive or negative)
- Is the reviewer expressing anger? (true or false)
- Item purchased by reviewer
- Brand that made the item

The review is delimited with triple backticks. 

Format your response as a JSON object with "Sentiment", "Anger", "Item" and "Brand" as the keys.
If the information isn't present, use "unknown" as the value.
Make your response as short as possible.

Format the Anger value as a boolean.

Review text: '''{review}'''
"""

response = generate_text(prompt)
print(response)

[{'generated_text': '"positive": true,"negative": false,"Item": "Lumina","Brand": "Lumina"'}]


In [35]:
%%time
prompt = f"""
Is the writer of the following review expressing anger?\
The review is delimited with triple backticks. \
Give your answer as a signle number, ranging on a scale from 0 to 9, where 0 means not angry, and 9 means extremely angry

Review text: '''{review}'''
"""
response = generate_text(prompt)
print(response)

[{'generated_text': '7'}]
CPU times: user 150 ms, sys: 135 Âµs, total: 150 ms
Wall time: 150 ms
