In [28]:
import os
import openai
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
import warnings
import re

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

def url_to_string(url):
    warnings.filterwarnings('ignore', message='Unverified HTTPS request')
    response = requests.get(url, verify=False)
    soup = BeautifulSoup(response.text, "html.parser")
    return soup.get_text()

def split_text(text, chunk_size=1000):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        if end < len(text) and text[end] not in '.?!':
            while end < len(text) and text[end] not in '.?!':
                end += 1
        chunks.append(text[start:end + 1].strip())
        start = end + 1
    return chunks

def openai_edit(text, i = "Rewrite it in narrative style") :
    response = openai.Edit.create(
      model = "text-davinci-edit-001",
      input = text,
      instruction = i
    )   
    return response['choices'][0]['text']

def openai_complete(text, i = "in a narrative style rewrite text below") :
    response = openai.Completion.create(
        model = "text-davinci-003",
        prompt = i+"\n"+ text +"",
        temperature = 0.5,
        max_tokens = 2048,
        top_p = 1,
        frequency_penalty = 0.2,
    )       
    return response['choices'][-1]['text']

def rewrite_text(chunks, func):
    rewritten_text = ""
    for chunk in chunks:
        chunk_rewritten_text = func(chunk)
        rewritten_text += chunk_rewritten_text
    return rewritten_text
    
text_chunks = split_text(url_to_string('http://socalnews.com/la-story-3.html'))

small_text_example = "The Burbank-based Walt Disney Co. will slash 7,000 jobs as part of a $5.5 billion cost-cutting effort, CEO Bob Iger announced Wednesday.The possibility of layoffs at Disney has been rumored for weeks, with Iger regaining his footing in the CEO’s office following the surprise ouster of CEO Bob Chapek in November. Iger broke the news during an earnings call Wednesday afternoon, following the release of the company’s first-quarter earnings report. Iger said he is targeting $5.5 billion in cost savings “across the company.”"
print(small_text_example)


The Burbank-based Walt Disney Co. will slash 7,000 jobs as part of a $5.5 billion cost-cutting effort, CEO Bob Iger announced Wednesday.The possibility of layoffs at Disney has been rumored for weeks, with Iger regaining his footing in the CEO’s office following the surprise ouster of CEO Bob Chapek in November. Iger broke the news during an earnings call Wednesday afternoon, following the release of the company’s first-quarter earnings report. Iger said he is targeting $5.5 billion in cost savings “across the company.”


In [47]:
# new_text = rewrite_text(text_chunks, openai_edit)
edit_model = openai_edit(small_text_example)
print("Example #1\nModel: Edit\n\n" +edit_model)

Example #1
Model: Edit

The Burbank-based Walt Disney Co. announced Wednesday it will slash 7,000 jobs as part of a $5.5 billion cost-cutting effort.
The possibility of layoffs had been rumored for weeks, with CEO Bob Iger regaining his footing in the CEO’s office following the surprise ouster of former CEO Bob Chapek in November.
Iger announced this during an earnings call Wednesday, following the release of the company’s first-quarter earnings report.
Iger said he is targeting $5.5 billion in cost savings “across the company.”



In [48]:
edit_model = openai_edit(small_text_example)
print("Example #2\nModel: Edit\n\n" +edit_model)

Example #2
Model: Edit

Earlier today, the Burbank-based Walt Disney Co. announced a plan to slash 7,000 jobs as part of a $5.5 billion cost-cutting effort.
CEO Bob Iger announced the plan during the company’s first-quarter earnings call on Wednesday.
The possibility of layoffs at Disney has been rumored for weeks, with Iger regaining his footing in the CEO’s office following the surprise ouster of CEO Bob Chapek in November.
Iger said he is targeting $5.5 billion in cost savings “across the company.”



In [56]:
complete_model = openai_complete(small_text_example)
print("Example #1\nModel: Complete" +complete_model)

Example #1
Model: Complete

On Wednesday, Bob Iger, CEO of the Burbank-based Walt Disney Company, confirmed weeks of speculation that the company would be cutting 7,000 jobs as part of a $5.5 billion cost-cutting effort. Iger made the announcement during an earnings call following the release of the company’s first-quarter earnings report. He stated that these reductions will be spread “across the company” and are necessary in order to reach their goal of $5.5 billion in cost savings. This news comes shortly after Iger took back his role as CEO following the unexpected departure of Bob Chapek in November.


In [55]:
complete_model = openai_complete(small_text_example)
print("Example #2\nModel: Complete" +complete_model)

Example #2
Model: Complete

Bob Iger, CEO of the Burbank-based Walt Disney Co., announced on Wednesday that the company will lay off 7,000 employees in a $5.5 billion cost-cutting effort. This news follows weeks of speculation and rumors after Bob Chapek was unexpectedly removed from the CEO's office in November. Iger made the announcement during an earnings call after the release of the company's first-quarter earnings report. He stated that the company is aiming for $5.5 billion in savings "across the company."


In [39]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
# import spacy
import en_core_web_sm
from pprint import pprint

nlp = en_core_web_sm.load()
doc = nlp(small_text_example)
pprint([(X.text, X.label_) for X in doc.ents])

[('Burbank', 'ORG'),
 ('Walt Disney Co.', 'ORG'),
 ('7,000', 'CARDINAL'),
 ('$5.5 billion', 'MONEY'),
 ('Bob Iger', 'PERSON'),
 ('Wednesday', 'DATE'),
 ('Disney', 'ORG'),
 ('weeks', 'DATE'),
 ('Iger', 'ORG'),
 ('Bob Chapek', 'PERSON'),
 ('November', 'DATE'),
 ('Wednesday', 'DATE'),
 ('afternoon', 'TIME'),
 ('first-quarter', 'DATE'),
 ('$5.5 billion', 'MONEY')]


In [31]:
from collections import Counter
labels = [x.label_ for x in doc.ents]
Counter(labels)

Counter({'ORG': 4,
         'CARDINAL': 1,
         'MONEY': 2,
         'PERSON': 2,
         'DATE': 5,
         'TIME': 1})

In [30]:
items = [x.text for x in doc.ents]
Counter(items).most_common(3)

[('$5.5 billion', 2), ('Wednesday', 2), ('Burbank', 1)]