In [5]:
 # Initial imports
import os
import pandas as pd
from newsapi import NewsApiClient
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
load_dotenv()
%matplotlib inline

In [6]:
 # Read your api key environment variable

api_key = os.getenv('NEWS_API_KEY')

In [7]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [8]:
 # Fetch the Bitcoin news articles
btc_headlines = newsapi.get_everything(
    q='bitcoin',
    language='en'
)

# Show sample article
btc_headlines["articles"][0]

{'source': {'id': 'engadget', 'name': 'Engadget'},
 'author': 'Jon Fingas',
 'title': 'Tesla buys $1.5 in Bitcoin, will soon accept it as payment',
 'description': 'Elon Musk’s cryptocurrency hype was more than just idle talk. CNBC reports that Tesla not only bought $1.5 billion worth of Bitcoin to help “diversify and maximize” its returns, but will start taking payments using the digital asset sometime in the “near futu…',
 'url': 'https://www.engadget.com/tesla-to-take-bitcoin-payments-140109988.html',
 'urlToImage': 'https://s.yimg.com/os/creatr-uploaded-images/2021-02/389f89e0-6a11-11eb-b5c5-309f2241e56a',
 'publishedAt': '2021-02-08T14:01:09Z',
 'content': 'Elon Musks cryptocurrency hype was more than just idle talk. CNBCreports that Tesla not only bought $1.5 billion worth of Bitcoin to help diversify and maximize its returns, but will start taking pay… [+1182 chars]'}

In [9]:
# Fetch the Ethereum news articles
eth_headlines = newsapi.get_everything(
    q='ethereum',
    language='en'
)

# Show sample article
eth_headlines["articles"][0]

{'source': {'id': 'engadget', 'name': 'Engadget'},
 'author': 'Jon Fingas',
 'title': 'NVIDIA limits RTX 3060 crypto speeds as it introduces mining cards',
 'description': "Worried that the GeForce RTX 3060 will be sold out as cryptocurrency miners snap up every GPU in sight? NVIDIA thinks it has a simple way to help: make the new card unattractive to the crypto crowd. The company has revealed that it's cutting the hash rate (mi…",
 'url': 'https://www.engadget.com/nvidia-limits-rtx-3060-for-crypto-mining-161657361.html',
 'urlToImage': 'https://s.yimg.com/uu/api/res/1.2/VLoRobjl1FcFEgV_0XjzqQ--~B/aD0xMDY3O3c9MTYwMDthcHBpZD15dGFjaHlvbg--/https://s.yimg.com/os/creatr-uploaded-images/2021-02/cad5ae60-7200-11eb-bbbc-25bd3507e406.cf.jpg',
 'publishedAt': '2021-02-18T16:16:57Z',
 'content': 'Worried that the GeForce RTX 3060 will be sold out as cryptocurrency miners snap up every GPU in sight? NVIDIA thinks it has a simple way to help: make the new card unattractive to the crypto crowd. … [

In [10]:
# Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [11]:
# Create the Bitcoin sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
btc_df = pd.DataFrame(btc_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
btc_df = btc_df[cols]

btc_df.head()


Unnamed: 0,date,text,compound,positive,negative,neutral
0,2021-02-08,Elon Musks cryptocurrency hype was more than j...,0.3182,0.091,0.0,0.909
1,2021-02-09,"Tesla, led by Elon Musk, confirmed that it pur...",0.3818,0.071,0.0,0.929
2,2021-02-09,"For a brief moment on Sunday, before Tesla sai...",-0.1027,0.0,0.038,0.962
3,2021-02-12,Twitter and Square CEO Jack Dorsey and rapper ...,0.25,0.056,0.0,0.944
4,2021-02-08,"Today in an SEC filing, Tesla disclosed that i...",0.6597,0.137,0.0,0.863


In [12]:
# Create the ethereum sentiment scores DataFrame
eth_sentiments = []

for article in eth_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
eth_df = pd.DataFrame(eth_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
eth_df = eth_df[cols]

eth_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
0,2021-02-18,Worried that the GeForce RTX 3060 will be sold...,-0.34,0.063,0.119,0.818
1,2021-01-28,Coinbase plans to go public by way of a direct...,0.0,0.0,0.0,1.0
2,2021-02-04,FILE PHOTO: Representation of the Ethereum vir...,0.0,0.0,0.0,1.0
3,2021-02-03,By Reuters Staff\r\nFILE PHOTO: Representation...,0.0,0.0,0.0,1.0
4,2021-02-20,While the first Christie's auction of digital ...,0.0258,0.03,0.0,0.97


In [13]:
 # Describe the Bitcoin Sentiment
btc_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,18.0,18.0,18.0,18.0
mean,0.199928,0.072111,0.019056,0.908778
std,0.37634,0.086707,0.03563,0.081887
min,-0.5037,0.0,0.0,0.647
25%,0.0,0.0,0.0,0.86875
50%,0.1779,0.0535,0.0,0.9195
75%,0.411575,0.0985,0.024,0.95725
max,0.9217,0.353,0.114,1.0


In [14]:
# Describe the Ethereum Sentiment
eth_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,20.0,20.0,20.0,20.0
mean,0.189495,0.0513,0.00595,0.94275
std,0.274088,0.054785,0.026609,0.062097
min,-0.34,0.0,0.0,0.818
25%,0.0,0.0,0.0,0.9
50%,0.0129,0.0465,0.0,0.9495
75%,0.3651,0.082,0.0,1.0
max,0.7096,0.144,0.119,1.0


In [15]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [16]:
# Expand the default stopwords list if necessary

In [17]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    
    # Create a list of the words
    sw = set(stopwords.words('english'))
    # Convert the words to lowercase
    regex = re.compile("[^a-zA-Z ]")
    #custom stopwords
    sw_addons = {'said', 'sent', 'found', 'including', 'today', 'announced', 'week', 'basically', 'also'}
    # Remove the punctuation
    re_clean = regex.sub('', text)
    # Remove the stop words
    words = word_tokenize(re_clean)
    # Lemmatize Words into root words
    lemmatizer = WordNetLemmatizer()
    lem=[lemmatizer.lemmatize(word) for word in words]
    
    tokens = [word.lower() for word in words if word.lower() not in sw.union(sw_addons)]
    
    return tokens

In [18]:
btc_df['tokens']=btc_df['text'].apply(tokenizer)
btc_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral,tokens
0,2021-02-08,Elon Musks cryptocurrency hype was more than j...,0.3182,0.091,0.0,0.909,"[elon, musks, cryptocurrency, hype, idle, talk..."
1,2021-02-09,"Tesla, led by Elon Musk, confirmed that it pur...",0.3818,0.071,0.0,0.929,"[tesla, led, elon, musk, confirmed, purchased,..."
2,2021-02-09,"For a brief moment on Sunday, before Tesla sai...",-0.1027,0.0,0.038,0.962,"[brief, moment, sunday, tesla, invested, billi..."
3,2021-02-12,Twitter and Square CEO Jack Dorsey and rapper ...,0.25,0.056,0.0,0.944,"[twitter, square, ceo, jack, dorsey, rapper, j..."
4,2021-02-08,"Today in an SEC filing, Tesla disclosed that i...",0.6597,0.137,0.0,0.863,"[sec, filing, tesla, disclosed, acquired, bill..."


In [19]:
# Create a new tokens column for ethereum
eth_df['tokens']=eth_df['text'].apply(tokenizer)
eth_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral,tokens
0,2021-02-18,Worried that the GeForce RTX 3060 will be sold...,-0.34,0.063,0.119,0.818,"[worried, geforce, rtx, sold, cryptocurrency, ..."
1,2021-01-28,Coinbase plans to go public by way of a direct...,0.0,0.0,0.0,1.0,"[coinbase, plans, go, public, way, direct, lis..."
2,2021-02-04,FILE PHOTO: Representation of the Ethereum vir...,0.0,0.0,0.0,1.0,"[file, photo, representation, ethereum, virtua..."
3,2021-02-03,By Reuters Staff\r\nFILE PHOTO: Representation...,0.0,0.0,0.0,1.0,"[reuters, stafffile, photo, representation, et..."
4,2021-02-20,While the first Christie's auction of digital ...,0.0258,0.03,0.0,0.97,"[first, christies, auction, digital, artwork, ..."


In [20]:
#NGrams and Frequency Analysis

In [21]:
from collections import Counter
from nltk import ngrams

In [46]:
 # Generate the Bitcoin N-grams where N=2
processed_btc = btc_df['tokens']
print(processed_btc)

0     [elon, musks, cryptocurrency, hype, idle, talk...
1     [tesla, led, elon, musk, confirmed, purchased,...
2     [brief, moment, sunday, tesla, invested, billi...
3     [twitter, square, ceo, jack, dorsey, rapper, j...
4     [sec, filing, tesla, disclosed, acquired, bill...
5     [bitcoin, first, appeared, digital, thin, air,...
6     [tldr, bitcoin, cryptocurrency, mastery, bundl...
7     [liveupdatedfeb, etfeb, etheres, need, knowbit...
8     [bitcoin, future, morning, hit, alltime, high,...
9     [jack, dorsey, jay, z, create, bitcoin, endowm...
10    [case, havent, noticed, bitcoins, bit, moment,...
11    [miamis, mayor, francis, suarez, champing, bit...
12    [man, inevitableon, friday, tesla, ceo, elon, ...
13    [new, york, cnn, businessbitcoins, roller, coa...
14    [cryptocurrencies, things, valuable, shared, a...
15    [coinbase, plans, go, public, way, direct, lis...
16    [alabamas, longestserving, senator, richard, s...
17    [blockchaincom, raised, million, funding, 

In [48]:
 # Generate the Ethereum N-grams where N=2
processed_eth = eth_df['tokens']
print(processed_eth)
    

0     [worried, geforce, rtx, sold, cryptocurrency, ...
1     [coinbase, plans, go, public, way, direct, lis...
2     [file, photo, representation, ethereum, virtua...
3     [reuters, stafffile, photo, representation, et...
4     [first, christies, auction, digital, artwork, ...
5     [attempting, get, ahead, cryptocurrency, miner...
6     [reuters, stafffile, photo, logo, bitcoin, dig...
7     [reuters, staffsingapore, feb, reuters, crypto...
8     [new, york, reuters, ethereum, second, largest...
9     [new, york, feb, reuters, ethereum, second, la...
10    [file, photo, representation, ethereum, virtua...
11    [ethereumrose, ending, st, february, following...
12    [dollar, index, little, changed, friday, payro...
13    [company, newsthyagaraju, adinarayananna, irre...
14    [new, york, reuters, investment, inflows, cryp...
15    [reuters, staffsingapore, reuters, bitcoin, hi...
16    [new, york, feb, reuters, investment, inflows,...
17    [dollar, continues, take, cues, economy, i

In [28]:
 # Use the token_count function to generate the top 10 words from each coin
def token_count(tokens, N=10):
    """Returns the top N tokens from the frequency count"""
    return Counter(tokens).most_common(N)

In [29]:
# Get the top 10 words for Bitcoin
# YOUR CODE HERE!

In [30]:
# Get the top 10 words for Ethereum
# YOUR CODE HERE!

In [31]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [20.0, 10.0]

In [None]:
 # Generate the Bitcoin word cloud
# YOUR CODE HERE!

In [32]:
# Generate the Ethereum word cloud
# YOUR CODE HERE! 

In [33]:
 import spacy
from spacy import displacy

In [34]:
!python -m spacy download en_core_web_sm


[93m    Error: Couldn't link model to 'en_core_web_sm'[0m
    Creating a symlink in spacy/data failed. Make sure you have the required
    permissions and try re-running the command as admin, or use a
    virtualenv. You can still import the model as a module and call its
    load() method, or create the symlink manually.

    C:\Users\maxla\anaconda3\envs\dev\lib\site-packages\en_core_web_sm -->
    C:\Users\maxla\anaconda3\envs\dev\lib\site-packages\spacy\data\en_core_web_sm


[93m    Creating a shortcut link for 'en' didn't work (maybe you don't have
    admin permissions?), but you can still load the model via its full
    package name: nlp = spacy.load('{name}')[0m
    Download successful but linking failed



In [35]:
 # Load the spaCy model
nlp = spacy.load('en_core_web_sm')

In [36]:
 # Concatenate all of the bitcoin text together
# YOUR CODE HERE!

In [37]:
# Run the NER processor on all of the text
# YOUR CODE HERE!

# Add a title to the document
# YOUR CODE HERE!

In [40]:
# Render the visualization
# YOUR CODE HERE!

In [41]:
# List all Entities
# YOUR CODE HERE!

In [42]:
# Concatenate all of the eth text together
# YOUR CODE HERE!

In [43]:
 # Run the NER processor on all of the text
# YOUR CODE HERE!

# Add a title to the document
# YOUR CODE HERE!

In [44]:
# Render the visualization
# YOUR CODE HERE!

In [45]:
 # List all Entities
# YOUR CODE HERE!