In [1]:
 # Initial imports
import os
import pandas as pd
from newsapi import NewsApiClient
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
load_dotenv()
%matplotlib inline

In [2]:
 # Read your api key environment variable

api_key = os.getenv('NEWS_API_KEY')

In [3]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [4]:
 # Fetch the Bitcoin news articles
btc_headlines = newsapi.get_everything(
    q='bitcoin',
    language='en'
)

# Show sample article
btc_headlines["articles"][0]

{'source': {'id': 'engadget', 'name': 'Engadget'},
 'author': 'Jon Fingas',
 'title': 'Tesla buys $1.5 in Bitcoin, will soon accept it as payment',
 'description': 'Elon Musk’s cryptocurrency hype was more than just idle talk. CNBC reports that Tesla not only bought $1.5 billion worth of Bitcoin to help “diversify and maximize” its returns, but will start taking payments using the digital asset sometime in the “near futu…',
 'url': 'https://www.engadget.com/tesla-to-take-bitcoin-payments-140109988.html',
 'urlToImage': 'https://s.yimg.com/os/creatr-uploaded-images/2021-02/389f89e0-6a11-11eb-b5c5-309f2241e56a',
 'publishedAt': '2021-02-08T14:01:09Z',
 'content': 'Elon Musks cryptocurrency hype was more than just idle talk. CNBCreports that Tesla not only bought $1.5 billion worth of Bitcoin to help diversify and maximize its returns, but will start taking pay… [+1182 chars]'}

In [5]:
# Fetch the Ethereum news articles
eth_headlines = newsapi.get_everything(
    q='ethereum',
    language='en'
)

# Show sample article
eth_headlines["articles"][0]

{'source': {'id': 'engadget', 'name': 'Engadget'},
 'author': 'Jon Fingas',
 'title': 'NVIDIA limits RTX 3060 crypto speeds as it introduces mining cards',
 'description': "Worried that the GeForce RTX 3060 will be sold out as cryptocurrency miners snap up every GPU in sight? NVIDIA thinks it has a simple way to help: make the new card unattractive to the crypto crowd. The company has revealed that it's cutting the hash rate (mi…",
 'url': 'https://www.engadget.com/nvidia-limits-rtx-3060-for-crypto-mining-161657361.html',
 'urlToImage': 'https://s.yimg.com/uu/api/res/1.2/VLoRobjl1FcFEgV_0XjzqQ--~B/aD0xMDY3O3c9MTYwMDthcHBpZD15dGFjaHlvbg--/https://s.yimg.com/os/creatr-uploaded-images/2021-02/cad5ae60-7200-11eb-bbbc-25bd3507e406.cf.jpg',
 'publishedAt': '2021-02-18T16:16:57Z',
 'content': 'Worried that the GeForce RTX 3060 will be sold out as cryptocurrency miners snap up every GPU in sight? NVIDIA thinks it has a simple way to help: make the new card unattractive to the crypto crowd. … [

In [6]:
# Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [8]:
# Create the Bitcoin sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
btc_df = pd.DataFrame(btc_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
btc_df = btc_df[cols]

btc_df.head()


Unnamed: 0,date,text,compound,positive,negative,neutral
0,2021-02-08,Elon Musks cryptocurrency hype was more than j...,0.3182,0.091,0.0,0.909
1,2021-02-09,"Tesla, led by Elon Musk, confirmed that it pur...",0.3818,0.071,0.0,0.929
2,2021-02-09,"For a brief moment on Sunday, before Tesla sai...",-0.1027,0.0,0.038,0.962
3,2021-02-12,Twitter and Square CEO Jack Dorsey and rapper ...,0.25,0.056,0.0,0.944
4,2021-02-08,"Today in an SEC filing, Tesla disclosed that i...",0.6597,0.137,0.0,0.863


In [10]:
# Create the ethereum sentiment scores DataFrame
eth_sentiments = []

for article in eth_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
eth_df = pd.DataFrame(eth_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
eth_df = eth_df[cols]

eth_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
0,2021-02-18,Worried that the GeForce RTX 3060 will be sold...,-0.34,0.063,0.119,0.818
1,2021-01-28,Coinbase plans to go public by way of a direct...,0.0,0.0,0.0,1.0
2,2021-02-04,FILE PHOTO: Representation of the Ethereum vir...,0.0,0.0,0.0,1.0
3,2021-02-03,By Reuters Staff\r\nFILE PHOTO: Representation...,0.0,0.0,0.0,1.0
4,2021-02-20,While the first Christie's auction of digital ...,0.0258,0.03,0.0,0.97


In [11]:
 # Describe the Bitcoin Sentiment
btc_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,19.0,19.0,19.0,19.0
mean,0.164989,0.071789,0.025895,0.902316
std,0.389439,0.084187,0.039145,0.07991
min,-0.5037,0.0,0.0,0.647
25%,-0.05135,0.0,0.0,0.8615
50%,0.1779,0.056,0.0,0.909
75%,0.40165,0.096,0.0445,0.9495
max,0.9217,0.353,0.114,1.0


In [12]:
# Describe the Ethereum Sentiment
eth_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,20.0,20.0,20.0,20.0
mean,0.189495,0.0513,0.00595,0.94275
std,0.274088,0.054785,0.026609,0.062097
min,-0.34,0.0,0.0,0.818
25%,0.0,0.0,0.0,0.9
50%,0.0129,0.0465,0.0,0.9495
75%,0.3651,0.082,0.0,1.0
max,0.7096,0.144,0.119,1.0


In [13]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [None]:
# Expand the default stopwords list if necessary

In [22]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    
    # Create a list of the words
    sw = set(stopwords.words('english'))
    # Convert the words to lowercase
    regex = re.compile("[^a-zA-Z ]")
    #custom stopwords
    sw_addons = {'said', 'sent', 'found', 'including', 'today', 'announced', 'week', 'basically', 'also'}
    # Remove the punctuation
    re_clean = regex.sub('', text)
    # Remove the stop words
    words = word_tokenize(re_clean)
    # Lemmatize Words into root words
    lemmatizer = WordNetLemmatizer()
    lem=[lemmatizer.lemmatize(word) for word in words]
    
    tokens = [word.lower() for word in words if word.lower() not in sw.union(sw_addons)]
    
    return tokens

In [23]:
btc_df['tokens']=btc_df['text'].apply(tokenizer)
btc_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral,tokens
0,2021-02-08,Elon Musks cryptocurrency hype was more than j...,0.3182,0.091,0.0,0.909,"[elon, musks, cryptocurrency, hype, idle, talk..."
1,2021-02-09,"Tesla, led by Elon Musk, confirmed that it pur...",0.3818,0.071,0.0,0.929,"[tesla, led, elon, musk, confirmed, purchased,..."
2,2021-02-09,"For a brief moment on Sunday, before Tesla sai...",-0.1027,0.0,0.038,0.962,"[brief, moment, sunday, tesla, invested, billi..."
3,2021-02-12,Twitter and Square CEO Jack Dorsey and rapper ...,0.25,0.056,0.0,0.944,"[twitter, square, ceo, jack, dorsey, rapper, j..."
4,2021-02-08,"Today in an SEC filing, Tesla disclosed that i...",0.6597,0.137,0.0,0.863,"[sec, filing, tesla, disclosed, acquired, bill..."


In [None]:
# Create a new tokens column for ethereum
# YOUR CODE HERE!