# Introduction

In this notebook, we explore the processed text and build some exploratry visualizations

# Imports

In [50]:
from random import randint

import boto3

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

from covid.utils.viz import plotly_wordcloud

# Setup

In [51]:
bucket = 'mleila-covid'
region_name = boto3.Session().region_name

# Load Data

In [52]:
# download file
s3 = boto3.resource('s3')
s3.Bucket(bucket).download_file('processed/covid/articles.txt', 'clean_articles.txt')

# read file
with open('clean_articles.txt', 'r') as f:
    data = f.readlines()

# Document Word Cloud

In [56]:
# plot
sample_text = data[randint(0, len(data)-1)]
iplot(plotly_wordcloud(sample_text))

# Sentiment Analysis using Comprehend

In [57]:
sample_text

"People worried about germs might be tempted to trade their reusable mugs and bags for single-use plastics. Environmental advocates say that's not any safer. \n"

In [70]:
comprehend = boto3.client('comprehend', region_name='us-east-1')

# Key phrases
phrases = comprehend.detect_key_phrases(Text=sample_text, LanguageCode='en')

# Entities
entities = comprehend.detect_entities(Text=sample_text, LanguageCode='en')

#Sentiments
sentiments = comprehend.detect_sentiment(Text=sample_text, LanguageCode='en')

In [78]:
key_phrases = [p['Text'] for p in phrases['KeyPhrases']]
print(f"Comprehend detected a {sentiments['Sentiment']}")
print(f"Comprehend detected the following entities {entities['Entities']}")
print(f"Comprehend detected the following key phrases {key_phrases}")

Comprehend detected a NEGATIVE
Comprehend detected the following entities []
Comprehend detected the following key phrases ['People', 'germs', 'their reusable mugs and bags', 'single-use plastics', 'Environmental advocates']
