In [56]:
import os
import requests
from bs4 import BeautifulSoup
from pypdf import PdfReader

import openai
from IPython.display import HTML
from tenacity import (
    retry,
    RetryError,
    stop_after_attempt,
    wait_fixed,
    wait_random
)

openai.organization = os.getenv('OPENAI_API_ORG')
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [57]:
@retry(wait=wait_fixed(60) + wait_random(0, 2), stop=stop_after_attempt(2))
def get_completion(prompt, model="gpt-3.5-turbo-16k"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

## Text to summarize

In [20]:
prod_review = """
Got this panda plush toy for my daughter's birthday, \
who loves it and takes it everywhere. It's soft and \ 
super cute, and its face has a friendly look. It's \ 
a bit small for what I paid though. I think there \ 
might be other options that are bigger for the \ 
same price. It arrived a day earlier than expected, \ 
so I got to play with it myself before I gave it \ 
to her.
"""

In [21]:
# Silly test for when things go wrong

# messages = [{"role": "user", "content": prod_review}]
# response = openai.ChatCompletion.create(
#        model="gpt-3.5-turbo-16k",
#        messages=messages,
#        temperature=0, # this is the degree of randomness of the model's output
#)
# print(response)

### Summarize with a word/sentence/character limit

In [22]:
prompt = f"""
Your task is to generate a short summary of a product \
review from an ecommerce site. 

Summarize the review below, delimited by triple 
backticks, in at most 30 words. 

Review: ```{prod_review}```
"""

response = get_completion(prompt)
print(response)

This panda plush toy is loved by the reviewer's daughter, but they feel it is a bit small for the price.


### Summarize with a focus on shipping and delivery

In [23]:
prompt = f"""
Your task is to generate a short summary of a product \
review from an ecommerce site to give feedback to the \
Shipping deparmtment. 

Summarize the review below, delimited by triple 
backticks, in at most 30 words, and focusing on any aspects \
that mention shipping and delivery of the product. 

Review: ```{prod_review}```
"""

response = get_completion(prompt)
print(response)


The customer is happy with the product but suggests offering larger options for the same price. They were pleased with the early delivery.


### Summarize with a focus on price and value

In [24]:
prompt = f"""
Your task is to generate a short summary of a product \
review from an ecommerce site to give feedback to the \
pricing deparmtment, responsible for determining the \
price of the product.  

Summarize the review below, delimited by triple 
backticks, in at most 30 words, and focusing on any aspects \
that are relevant to the price and perceived value. 

Review: ```{prod_review}```
"""

response = get_completion(prompt)
print(response)


The customer loves the panda plush toy for its softness and cuteness, but feels it is overpriced compared to other options available.


### Try to extract instead of summarize

In [25]:
prompt = f"""
Your task is to extract relevant information from \ 
a product review from an ecommerce site to give \
feedback to the Shipping department. 

From the review below, delimited by triple quotes \
extract the information relevant to shipping and \ 
delivery. Limit to 30 words. 

Review: ```{prod_review}```
"""

response = get_completion(prompt)
print(response)

The relevant information for the Shipping department is that the product arrived a day earlier than expected.


### Summarize multiple product reviews

In [26]:
review_1 = prod_review 

# review for a standing lamp
review_2 = """
Needed a nice lamp for my bedroom, and this one \
had additional storage and not too high of a price \
point. Got it fast - arrived in 2 days. The string \
to the lamp broke during the transit and the company \
happily sent over a new one. Came within a few days \
as well. It was easy to put together. Then I had a \
missing part, so I contacted their support and they \
very quickly got me the missing piece! Seems to me \
to be a great company that cares about their customers \
and products. 
"""

# review for an electric toothbrush
review_3 = """
My dental hygienist recommended an electric toothbrush, \
which is why I got this. The battery life seems to be \
pretty impressive so far. After initial charging and \
leaving the charger plugged in for the first week to \
condition the battery, I've unplugged the charger and \
been using it for twice daily brushing for the last \
3 weeks all on the same charge. But the toothbrush head \
is too small. I’ve seen baby toothbrushes bigger than \
this one. I wish the head was bigger with different \
length bristles to get between teeth better because \
this one doesn’t.  Overall if you can get this one \
around the $50 mark, it's a good deal. The manufactuer's \
replacements heads are pretty expensive, but you can \
get generic ones that're more reasonably priced. This \
toothbrush makes me feel like I've been to the dentist \
every day. My teeth feel sparkly clean! 
"""

# review for a blender
review_4 = """
So, they still had the 17 piece system on seasonal \
sale for around $49 in the month of November, about \
half off, but for some reason (call it price gouging) \
around the second week of December the prices all went \
up to about anywhere from between $70-$89 for the same \
system. And the 11 piece system went up around $10 or \
so in price also from the earlier sale price of $29. \
So it looks okay, but if you look at the base, the part \
where the blade locks into place doesn’t look as good \
as in previous editions from a few years ago, but I \
plan to be very gentle with it (example, I crush \
very hard items like beans, ice, rice, etc. in the \ 
blender first then pulverize them in the serving size \
I want in the blender then switch to the whipping \
blade for a finer flour, and use the cross cutting blade \
first when making smoothies, then use the flat blade \
if I need them finer/less pulpy). Special tip when making \
smoothies, finely cut and freeze the fruits and \
vegetables (if using spinach-lightly stew soften the \ 
spinach then freeze until ready for use-and if making \
sorbet, use a small to medium sized food processor) \ 
that you plan to use that way you can avoid adding so \
much ice if at all-when making your smoothie. \
After about a year, the motor was making a funny noise. \
I called customer service but the warranty expired \
already, so I had to buy another one. FYI: The overall \
quality has gone done in these types of products, so \
they are kind of counting on brand recognition and \
consumer loyalty to maintain sales. Got it in about \
two days.
"""

reviews = [review_1, review_2, review_3, review_4]

In [27]:
for i in range(len(reviews)):
    prompt = f"""
    Your task is to generate a short summary of a product \ 
    review from an ecommerce site. 

    Summarize the review below, delimited by triple \
    backticks in at most 20 words. 

    Then, format it as HTML so we can display it in a \
    website as a table. The table would be preceeded by the review number, \
    then the table will have two columns, \
    the header will have a positive emoji in the left column \
    and a negative emoji in the right column. \
    Review lines will be separated by positive and negative sentiments \
    Review lines with positive sentiment will be written in cells  \
    belonging to the positive column, and review lines with negative \
    sentiment will be written in cells belonging to the negative column.

    Review: ```{reviews[i]}```
    """

    response = get_completion(prompt)
    display(HTML(response))
    


😊,😞
"Got this panda plush toy for my daughter's birthday, who loves it and takes it everywhere. It's soft and super cute, and its face has a friendly look.",It's a bit small for what I paid though. I think there might be other options that are bigger for the same price.
"It arrived a day earlier than expected, so I got to play with it myself before I gave it to her.",


😊,😞
"Needed a nice lamp for my bedroom, and this one had additional storage and not too high of a price point.",
Got it fast - arrived in 2 days.,
The string to the lamp broke during the transit and the company happily sent over a new one.,
Came within a few days as well.,
It was easy to put together.,
"Then I had a missing part, so I contacted their support and they very quickly got me the missing piece!",
Seems to me to be a great company that cares about their customers and products.,


😃 Positive,😞 Negative
Battery life is impressive.,Toothbrush head is too small.
Good deal if around $50.,Manufacturer's replacement heads are expensive.
Makes teeth feel sparkly clean.,


😃 Positive,😞 Negative
Still had the 17 piece system on sale for $49 in November.,Prices increased to $70-$89 in December.
11 piece system also increased in price.,Base doesn't look as good as previous editions.
Plan to be gentle with it and use different blades for different purposes.,Motor started making a funny noise after a year.
Customer service couldn't help due to expired warranty.,Overall quality of the product has declined.
Received the product in about two days.,


## Scrap and prompt

In [28]:
url = "https://redeem-tomorrow.com/leviathan-wakes-the-case-for-apples-vision-pro"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
div_tag = soup.find("div", class_="e-content")

post_content = div_tag.get_text()


In [29]:
prompt = f"""
Your task is to generate a short summary of the post below, \
delimited by triple backticks in at most 200 words and three paragraphs.

Please format it in HTML so we ca display it as output of a jupyter notebook.

Post: ```{post_content}```
"""

response = get_completion(prompt)
display(HTML(response))

In [3]:
url = "https://educatedguesswork.org/posts/wei/"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
div_tag = soup.find("div", class_="grid-main")

post_content = div_tag.get_text()

In [5]:
prompt = f"""
Your task is to generate a short summary of the post below delimited by triple backticks, \
It should have threee sections, \
one describing the history of communications and internet, and the keys of its success, in less than 200 words \
then a section discussing the impact of mobile applications in closed platforms in less than 100 words \n
and another one summarizing the opinions of the author regarding WEI proposal. \

Please format it in HTML so we ca display it as output of a jupyter notebook.

Post: ```{post_content}```
"""

response = get_completion(prompt)
display(HTML(response))

## Summarize from long PDF file by chapters

In [3]:
reader = PdfReader("../../data/some_pdf_here.pdf") # you need to put something here
number_of_pages = len(reader.pages)




In [41]:
def get_chapter_ranges(reader, num_pages):
    starts = []
    for page in reader.pages:
        lines = page.extract_text().splitlines()
        if lines and "Chapter" in lines[0]:
            starts.append(page.page_number)
    return zip(starts, starts[1:] + [num_pages])


In [25]:
print(list(get_chapter_ranges(reader, number_of_pages)))

[(37, 49), (49, 65), (65, 79), (79, 103), (103, 113), (113, 135), (135, 171), (171, 177), (177, 187), (187, 193), (193, 223), (223, 247), (247, 259), (259, 267), (267, 295), (295, 307), (307, 327), (327, 369)]


In [42]:
def get_chapters(reader):
    number_of_pages = len(reader.pages)
    chapters = get_chapter_ranges(reader, number_of_pages)
    for chapter in chapters:
        start, end = chapter
        yield [reader.pages[n] for n in range(start, end)]

In [46]:
def get_chapters_text(chapters):
    for chapter in chapters:
        text = ""
        for page in chapter:
            text += page.extract_text()
        yield text

In [58]:
summaries = []

for chapter, text in enumerate(get_chapters_text(get_chapters(reader))):
    prompt = f"""
        Your task is to generate a summary for the chapter text delimited by triple backticks of less than 300 words, \


        Please format it in HTML utilizing a title header with the chapter name \
        so we can display it as output of a jupyter notebook, with a title per summarized chapter.

        Post: ```{text}```
        """
    try:
        num_words = len(text.split())
        summaries.append(get_completion(prompt))
        print(f"Procesed chapter {chapter} with {num_words} words")
    except RetryError as exc:
        print(f"Error processing chapter {chapter} with {num_words} words")


Procesed chapter 0 with 4114 words
Procesed chapter 1 with 4185 words
Procesed chapter 2 with 4132 words
Procesed chapter 3 with 6441 words
Procesed chapter 4 with 2620 words
Procesed chapter 5 with 4730 words
Procesed chapter 6 with 9977 words
Procesed chapter 7 with 1685 words
Procesed chapter 8 with 2444 words
Procesed chapter 9 with 1018 words
Procesed chapter 10 with 7712 words
Procesed chapter 11 with 5831 words
Procesed chapter 12 with 3568 words
Procesed chapter 13 with 2283 words
Procesed chapter 14 with 8453 words
Procesed chapter 15 with 3852 words
Procesed chapter 16 with 5939 words
Error processing chapter 17 with 12783 words


In [48]:
display(HTML("".join(summaries)))
