# Project Title: The Impact of News on the Market
### •	Team Members:
##### 	Rachel Torres, Christian Attard, Jess Alcalde, Nitin Khade
### •	Project Description/Outline:
##### -	We will look at news data and stock data to determine the effects of the news on how the market behaves.
### •	Research Questions to Answer:
##### -	How do news headlines affect the stock market?
##### -	Is there any correlation between certain types of headlines and effects on the market?
##### -	Does negative news affect stocks greater than positive or neutral?
##### -	Can we assign a factor(weighting) to it?

In [3]:
# import dependencies
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint

from news_api import api_key
from x_api import x_api_key

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

# pip install vaderSentiment


### We'll query the news api to gather news headlines from the web

In [4]:
# parameters for news api
q='politics',
from_param='2019-05-24',
language='en',
sort_by='relevancy',
page = 4,
pageSize = 50

url = f'https://newsapi.org/v2/everything?q={q}&from={from_param}&language={language}&sortBy={sort_by}&pageSize={pageSize}&page={page}&apiKey={api_key}'
response = requests.get(url)

url_2 = f'https://newsapi.org/v2/everything?q=apple&from=2019-05-24&to=2019-06-23&language=en&sortBy=popularity&pageSize={pageSize}&page=3&apiKey={api_key}'
response = requests.get(url_2)
# Generate response into json 

In [5]:
pprint(response.json())

{'code': 'parameterInvalid',
 'message': 'You are trying to request results too far in the past. Your plan '
            'permits you to request articles as far back as 2019-05-26, but '
            'you have requested 2019-05-24. To extend this please upgrade to a '
            'paid plan.',
 'status': 'error'}


### We'll use a different API to gather news data since we have a restriction on the news api

In [22]:
# Query contextual web search API
# Replace the following string value with your valid X-RapidAPI-Key.
Your_X_RapidAPI_Key = x_api_key;

# The query parameters: (update according to your search query)
q = "Wayfair" #the search query
pageNumber = 1 #the number of requested page
pageSize = 50 #the size of a page
autoCorrect = True #autoCorrectspelling
safeSearch = False #filter results for adult content


# test response to query and get count of total items and pages
response_test=requests.get("https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?q={}&pageNumber={}&pageSize={}&autocorrect={}&safeSearch={}".format(q, pageNumber, pageSize, autoCorrect,safeSearch),
headers={
"X-RapidAPI-Key": Your_X_RapidAPI_Key
}
).json()

#Get the number of items returned
totalCount = response_test["totalCount"];
totalPages = round(totalCount/pageSize)


In [8]:
# pprint(response_test)

In [23]:
# generate sample response to look at headers
pprint((response_test['value'][2]))

{'datePublished': '2019-06-26T21:49:10',
 'description': 'BOSTON (AP) - Employees at online home furnishings retailer '
                "<b>Wayfair</b> walked out Wednesday to protest the company's "
                'decision to sell $200,000 worth of furniture to a government '
                'contractor that runs a detention center for migrant children '
                'in Texas. \n'
                ' The protest triggered a broader backlash against the '
                'company, with some customers calling for a boycott. Several '
                'hundred people joined the protest at a plaza near the '
                "company's Boston headquarters, a mix of employees and people "
                'from outside the company. \n'
                " More than 500 employees at the company's Boston headquarters "
                'signed a protest letter to executives when they found out '
                'about the contract. <b>Wayfair</b> refused to back out of the '
                'c

In [24]:
print(f' There are {totalPages} pages, with {totalCount} total articles.')

 There are 30 pages, with 1475 total articles.


In [25]:
# create empty lists to hold variable results
url_list = []
title_list = []
description_list = []
keywords_list = []
provider_list = []
date_list = []

# Load news data into pandas dataframe
# page = 1
for page in range(1,totalPages+1):
    response=requests.get("https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?q={}&pageNumber={}&pageSize={}&autocorrect={}&safeSearch={}".format(q, page, pageSize, autoCorrect,safeSearch),
headers={"X-RapidAPI-Key": Your_X_RapidAPI_Key}).json()

# print(response)
    try:
        #Go over each resulting item
        for webPage in response["value"]:

#Get the web page metadata
            url = webPage["url"]
            title = webPage["title"]
            description = webPage["description"]
            keywords = webPage["keywords"]
            provider = webPage["provider"]["name"]
            datePublished = webPage["datePublished"]
        
        
            url_list.append(url)
            title_list.append(title)
            description_list.append(description)
            keywords_list.append(keywords)
            provider_list.append(provider)
            date_list.append(datePublished)

#             print(f'printing {title}')    
    except (KeyError,IndexError):
            print('Not found, skipping')


In [50]:
# assign list to dataframe

newsFrame = pd.DataFrame({'datePublished':date_list,'description':description_list,'keywords':keywords_list,'provider':provider_list,'title':title_list,'url':url_list})


In [51]:
newsFrame.head(200)

Unnamed: 0,datePublished,description,keywords,provider,title,url
0,2019-06-26T22:09:15,Some Wayfairemployees walked off the job to pr...,"wayfair employees stage walkout,border camps,p...",adage,<b>Wayfair</b> employees stage walkout to prot...,https://adage.com/article/news/wayfair-employe...
1,2019-06-26T22:04:39,Catch up on the most important news from today...,"facebook data value,wayfair walkout,boston,news",wired,"<b>Wayfair</b> Walkout, Facebook Data Value, a...",https://www.wired.com/story/wayfair-walkout-fa...
2,2019-06-26T21:49:10,BOSTON (AP) - Employees at online home furnish...,"wayfair employees,detention center,boston",mysanantonio,<b>Wayfair</b> workers protest furniture sale ...,https://www.mysanantonio.com/news/texas/articl...
3,2019-06-26T21:37:39,The protest triggered a broader backlash again...,"wayfair workers protest sale,lakeland,ledger",theledger,<b>Wayfair</b> workers protest sale to detenti...,https://www.theledger.com/news/20190626/wayfai...
4,2019-06-26T21:25:52,The Latest on a protest by <b>Wayfair</b> empl...,"wayfair,latest,texas",startribune,The Latest: Red Cross thanks <b>Wayfair</b> fo...,http://www.startribune.com/the-latest-wayfair-...
5,2019-06-26T21:20:22,Employees want the company to stop selling fur...,"getty images wayfair,wayfair employees,border",google,"<b>Wayfair</b> Employees Protest, Stage Walkou...",http://feedproxy.google.com/~r/inmannews/~3/K1...
6,2019-06-26T21:01:50,Hundreds of <b>Wayfair</b> employees filled Co...,"wayfair protest,maine public,brunswick,boston",mainepublic,<b>Wayfair</b> Protest In Brunswick Sees Small...,https://www.mainepublic.org/post/wayfair-prote...
7,2019-06-26T20:57:54,BOSTON The Latest on a protest by <b>Wayfair<...,"protest sale,wayfair,latest,texas",citynews1130,The Latest: <b>Wayfair</b> workers protest sal...,https://www.citynews1130.com/2019/06/26/the-la...
8,2019-06-26T20:34:52,"Several hundred people, including employees of...","protest furniture sale,wayfair,boston",reuters,"<b>Wayfair</b> workers, supporters protest fur...",http://feeds.reuters.com/~r/Reuters/domesticNe...
9,2019-06-26T20:30:29,A discussion about the walkout and whether <b>...,"copley square,radio boston,wayfair",wbur,<b>Wayfair</b> Walkout Raises Questions About ...,http://www.wbur.org/radioboston/2019/06/26/way...


In [52]:
newsFrame.dtypes

datePublished    object
description      object
keywords         object
provider         object
title            object
url              object
dtype: object

In [53]:
# clean description and title columns
newsFrame.description = newsFrame.description.str.replace('<b>','')
newsFrame.description = newsFrame.description.str.replace('</b>','')
newsFrame.title = newsFrame.title.str.replace('<b>','')
newsFrame.title = newsFrame.title.str.replace('</b>','')

In [54]:
newsFrame.head()

Unnamed: 0,datePublished,description,keywords,provider,title,url
0,2019-06-26T22:09:15,Some Wayfairemployees walked off the job to pr...,"wayfair employees stage walkout,border camps,p...",adage,Wayfair employees stage walkout to protest bed...,https://adage.com/article/news/wayfair-employe...
1,2019-06-26T22:04:39,Catch up on the most important news from today...,"facebook data value,wayfair walkout,boston,news",wired,"Wayfair Walkout, Facebook Data Value, and More...",https://www.wired.com/story/wayfair-walkout-fa...
2,2019-06-26T21:49:10,BOSTON (AP) - Employees at online home furnish...,"wayfair employees,detention center,boston",mysanantonio,Wayfair workers protest furniture sale to dete...,https://www.mysanantonio.com/news/texas/articl...
3,2019-06-26T21:37:39,The protest triggered a broader backlash again...,"wayfair workers protest sale,lakeland,ledger",theledger,Wayfair workers protest sale to detention center,https://www.theledger.com/news/20190626/wayfai...
4,2019-06-26T21:25:52,The Latest on a protest by Wayfair employees o...,"wayfair,latest,texas",startribune,The Latest: Red Cross thanks Wayfair for donation,http://www.startribune.com/the-latest-wayfair-...


In [55]:
print(totalPages)

30


In [56]:
newsFrame.count()

datePublished    846
description      846
keywords         846
provider         846
title            846
url              846
dtype: int64

In [57]:
# Query sample news description
newsFrame.iloc[7][1]

'BOSTON  The Latest on a protest by Wayfair employees over the companys furniture sale to a contractor that runs detention centres for migrant children in Texas. (all times local): 4:45 p.m. Several hundred people joined a protest staged by Wayfair employees to protest the online retailers decision to sell $200,000 worth of furniture to'

In [None]:
# import stock data 

# Load stock data into notebook as dataframe

In [None]:
# Merge 2 DataFrames

### About the Scoring (taken from vaderSentiment docs)
The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). This is the most useful metric if you want a single unidimensional measure of sentiment for a given sentence. Calling it a 'normalized, weighted composite score' is accurate.

It is also useful for researchers who would like to set standardized thresholds for classifying sentences as either positive, neutral, or negative. Typical threshold values (used in the literature cited on this page) are:

    positive sentiment: compound score >= 0.05
    neutral sentiment: (compound score > -0.05) and (compound score < 0.05)
    negative sentiment: compound score <= -0.05
The pos, neu, and neg scores are ratios for proportions of text that fall in each category (so these should all add up to be 1... or close to it with float operation). These are the most useful metrics if you want multidimensional measures of sentiment for a given sentence.

In [162]:
# Do sentiment analysis of news data

# Define function to test 
def sentiment_scores_print(sentence):
    score = analyser.polarity_scores(sentence)
    print("{:-<40} \n \n{}".format(sentence, str(score)))
    return score

def sentiment_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score

# Do test of single string
sentiment_scores_print(newsFrame.iloc[0][1])

Some Wayfairemployees walked off the job to protest the online retailers sale of beds to contractors furnishing border camps for asylum seekers. 
 
{'neg': 0.087, 'neu': 0.913, 'pos': 0.0, 'compound': -0.25}


{'neg': 0.087, 'neu': 0.913, 'pos': 0.0, 'compound': -0.25}

In [163]:
# add column in newsFrame for sentiment score
newsFrame['sentiment_score'] = ''

In [164]:
# check dataframe
newsFrame.head()

Unnamed: 0,datePublished,description,keywords,provider,title,url,sentiment_score
0,2019-06-26T22:09:15,Some Wayfairemployees walked off the job to pr...,"wayfair employees stage walkout,border camps,p...",adage,Wayfair employees stage walkout to protest bed...,https://adage.com/article/news/wayfair-employe...,
1,2019-06-26T22:04:39,Catch up on the most important news from today...,"facebook data value,wayfair walkout,boston,news",wired,"Wayfair Walkout, Facebook Data Value, and More...",https://www.wired.com/story/wayfair-walkout-fa...,
2,2019-06-26T21:49:10,BOSTON (AP) - Employees at online home furnish...,"wayfair employees,detention center,boston",mysanantonio,Wayfair workers protest furniture sale to dete...,https://www.mysanantonio.com/news/texas/articl...,
3,2019-06-26T21:37:39,The protest triggered a broader backlash again...,"wayfair workers protest sale,lakeland,ledger",theledger,Wayfair workers protest sale to detention center,https://www.theledger.com/news/20190626/wayfai...,
4,2019-06-26T21:25:52,The Latest on a protest by Wayfair employees o...,"wayfair,latest,texas",startribune,The Latest: Red Cross thanks Wayfair for donation,http://www.startribune.com/the-latest-wayfair-...,


In [169]:
# add in sentiment analysis to data frame

for i, row in newsFrame.iterrows():
    sentiment_score = sentiment_scores(row[1])['compound'] # take sentiment of description of each article
    row['sentiment_score'] = sentiment_score 
#     print((sentiment_score))

In [170]:
# check dataframe to see if sentiment score was added in
newsFrame.head()

Unnamed: 0,datePublished,description,keywords,provider,title,url,sentiment_score
0,2019-06-26T22:09:15,Some Wayfairemployees walked off the job to pr...,"wayfair employees stage walkout,border camps,p...",adage,Wayfair employees stage walkout to protest bed...,https://adage.com/article/news/wayfair-employe...,-0.25
1,2019-06-26T22:04:39,Catch up on the most important news from today...,"facebook data value,wayfair walkout,boston,news",wired,"Wayfair Walkout, Facebook Data Value, and More...",https://www.wired.com/story/wayfair-walkout-fa...,0.2716
2,2019-06-26T21:49:10,BOSTON (AP) - Employees at online home furnish...,"wayfair employees,detention center,boston",mysanantonio,Wayfair workers protest furniture sale to dete...,https://www.mysanantonio.com/news/texas/articl...,-0.8022
3,2019-06-26T21:37:39,The protest triggered a broader backlash again...,"wayfair workers protest sale,lakeland,ledger",theledger,Wayfair workers protest sale to detention center,https://www.theledger.com/news/20190626/wayfai...,-0.5106
4,2019-06-26T21:25:52,The Latest on a protest by Wayfair employees o...,"wayfair,latest,texas",startribune,The Latest: Red Cross thanks Wayfair for donation,http://www.startribune.com/the-latest-wayfair-...,-0.5423


In [None]:
# Use Matplotlib and stats to generate graphs and look for relationships