In [1]:
!pip install newspaper3k transformers gradio --quiet 

[K     |████████████████████████████████| 211 kB 7.4 MB/s 
[K     |████████████████████████████████| 2.6 MB 46.6 MB/s 
[K     |████████████████████████████████| 2.2 MB 11.0 MB/s 
[K     |████████████████████████████████| 81 kB 6.9 MB/s 
[K     |████████████████████████████████| 7.4 MB 47.2 MB/s 
[K     |████████████████████████████████| 87 kB 6.7 MB/s 
[K     |████████████████████████████████| 3.3 MB 60.2 MB/s 
[K     |████████████████████████████████| 895 kB 62.9 MB/s 
[K     |████████████████████████████████| 636 kB 58.5 MB/s 
[K     |████████████████████████████████| 206 kB 56.1 MB/s 
[K     |████████████████████████████████| 1.9 MB 45.3 MB/s 
[K     |████████████████████████████████| 961 kB 36.7 MB/s 
[K     |████████████████████████████████| 3.2 MB 29.6 MB/s 
[K     |████████████████████████████████| 63 kB 2.7 MB/s 
[?25h  Building wheel for tinysegmenter (setup.py) ... [?25l[?25hdone
  Building wheel for feedfinder2 (setup.py) ... [?25l[?25hdone
  Building whee

## Load libraries

In [2]:
from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')

from transformers import pipeline
import gradio as gr
from gradio.mix import Parallel, Series

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10

url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/'
article = Article(url, config=config)

## Download the article

In [4]:
article.download() 

In [5]:
article.html

'<!doctype html>\n<html lang="en" lang="en" style="\n            overflow: ;\n          " >\n<head>\n    <meta charset="utf-8">\n    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">\n    <title data-react-helmet="true">AI voice actors sound more human than ever—and are ready to hire | MIT Technology Review</title>\n    <meta data-react-helmet="true" name="description" content="A new wave of startups are using deep learning to build synthetic voice actors for digital assistants, video-game characters, and corporate videos."/><meta data-react-helmet="true" name="keywords" content=""/><meta data-react-helmet="true" property="og:url" content="https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/"/><meta data-react-helmet="true" property="og:type" content="article"/><meta data-react-helmet="true" property="og:title" content="AI voice actors sound more human than ever—and they’re ready to hire"/><meta data-react-helmet="true" p

## Parse information from article

In [6]:
article.parse() 

authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url

In [7]:
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")

Information about the article
Title: AI voice actors sound more human than ever—and they’re ready to hire
Author(s): Karen Hao
Publish date: 2021-07-09 00:00:00
Image: https://wp.technologyreview.com/wp-content/uploads/2021/07/AIAudioActor-2.jpg?resize=1200,600
Videos: []
Article link: https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/
Content: The company blog post drips with the enthusiasm of a ’90s US infomercial. WellSaid Labs describes wh...


## NLP from article

In [8]:
article.nlp()

In [9]:
keywords = article.keywords
keywords.sort()
print(keywords)

['actors', 'ai', 'audio', 'certainly', 'clients', 'companies', 'different', 'everand', 'hire', 'human', 'ready', 'sound', 'theyre', 'voice', 'voices']


In [10]:
keywords = "\n".join(keyw for keyw in keywords)

In [11]:
print(f"Article Keywords: \n{keywords}")

Article Keywords: 
actors
ai
audio
certainly
clients
companies
different
everand
hire
human
ready
sound
theyre
voice
voices


### Newspaper library summary

In [12]:
print(f"Summary: \n{article.summary}")

Summary: 
Unlike a recording of a human voice actor, synthetic voices can also update their script in real time, opening up new opportunities to personalize advertising.
Human voice actors, in particular, have been left to wonder what this means for their livelihoods.
Part of what makes a human voice so human is its inconsistency, expressiveness, and ability to deliver the same lines in completely different styles, depending on the context.
Capturing these nuances involves finding the right voice actors to supply the appropriate training data and fine-tune the deep-learning models.
Many of its clients use the synthesized voices only in pre-production and switch to real voice actors for the final production.


In [13]:
text

'The company blog post drips with the enthusiasm of a ’90s US infomercial. WellSaid Labs describes what clients can expect from its “eight new digital voice actors!” Tobin is “energetic and insightful.” Paige is “poised and expressive.” Ava is “polished, self-assured, and professional.”\n\nEach one is based on a real voice actor, whose likeness (with consent) has been preserved using AI. Companies can now license these voices to say whatever they need. They simply feed some text into the voice engine, and out will spool a crisp audio clip of a natural-sounding performance.\n\nWellSaid Labs, a Seattle-based startup that spun out of the research nonprofit Allen Institute of Artificial Intelligence, is the latest firm offering AI voices to clients. For now, it specializes in voices for corporate e-learning videos. Other startups make voices for digital assistants, call center operators, and even video-game characters.\n\nNot too long ago, such deepfake voices had something of a lousy repu

## summarize with Hugging Face and Gradio

In [14]:
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")  
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")                   

iface = Parallel(io1, io2, io3, io4,
                 theme='huggingface', 
                 inputs = gr.inputs.Textbox(lines = 10, label="Text"))

iface.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
This share link will expire in 24 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted (NEW!)
Running on External URL: https://38417.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7860/',
 'https://38417.gradio.app')

In [15]:
def extract_article_text(url):
  USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
  config = Config()
  config.browser_user_agent = USER_AGENT
  config.request_timeout = 10

  article = Article(url, config=config)
  article.download()
  article.parse()
  text = article.text
  return text

In [16]:
extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")

sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'],
              ['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'],
              ['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']]

desc =  '''
        Let Hugging Face models summarize articles for you. 
        Note: Shorter articles generate faster summaries.
        This summarizer uses bart-large-cnn model by Facebook
        '''

iface = Series(extractor, summarizer, 
  inputs = gr.inputs.Textbox(
      lines = 2,
      label = 'URL'
  ),
  outputs = 'text',
  title = 'News Summarizer',
  theme = 'huggingface',
  description = desc,
  examples=sample_url)

iface.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
This share link will expire in 24 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted (NEW!)
Running on External URL: https://47893.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7861/',
 'https://47893.gradio.app')