# Project Title: The Impact of News on the Market
### •	Team Members:
##### 	Rachel Torres, Christian Attard, Jess Alcalde, Nitin Khade
### •	Project Description/Outline:
##### -	We will look at news data and stock data to determine the effects of the news on how the market behaves.
### •	Research Questions to Answer:
##### -	How do news headlines affect the stock market?
##### -	Is there any correlation between certain types of headlines and effects on the market?
##### -	Does negative news affect stocks greater than positive or neutral?
##### -	Can we assign a factor(weighting) to it?

In [1]:
# import dependencies
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint

from news_api import api_key
from x_api import x_api_key

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

# pip install vaderSentiment


In [3]:
# Connect to News API
# Init
# newsapi = NewsApiClient(api_key)

# parameters
q='politics',
from_param='2019-05-24',
language='en',
sort_by='relevancy',
page = 4,
pageSize = 50

url = f'https://newsapi.org/v2/everything?q={q}&from={from_param}&language={language}&sortBy={sort_by}&pageSize={pageSize}&page={page}&apiKey={api_key}'
response = requests.get(url)

url_2 = f'https://newsapi.org/v2/everything?q=apple&from=2019-05-24&to=2019-06-23&language=en&sortBy=popularity&pageSize={pageSize}&page=3&apiKey={api_key}'
response = requests.get(url_2)
# Generate response into json 

In [4]:
pprint(response.json())

{'code': 'maximumResultsReached',
 'message': 'You have requested too many results. Developer accounts are '
            'limited to a max of 100 results. You are trying to request '
            'results 100 to 150. Please upgrade to a paid plan if you need '
            'more results.',
 'status': 'error'}


In [5]:
#Replace the following string value with your valid X-RapidAPI-Key.
Your_X_RapidAPI_Key = x_api_key;

#The query parameters: (update according to your search query)
q = "Donald%20Trump%20News" #the search query
pageNumber = 1 #the number of requested page
pageSize = 50 #the size of a page
autoCorrect = True #autoCorrectspelling
safeSearch = False #filter results for adult content


# test response to query and get count of total items and pages
response_test=requests.get("https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?q={}&pageNumber={}&pageSize={}&autocorrect={}&safeSearch={}".format(q, pageNumber, pageSize, autoCorrect,safeSearch),
headers={
"X-RapidAPI-Key": Your_X_RapidAPI_Key
}
).json()

#Get the numer of items returned
totalCount = response_test["totalCount"];
totalPages = round(totalCount/pageSize)

#Get the list of most frequent searches related to the input search query
# relatedSearch = response_test["relatedSearch"]

#Go over each resulting item
# for webPage in response_test["value"]:

# #Get the web page metadata
#     url = webPage["url"]
#     title = webPage["title"]
#     description = webPage["description"]
#     keywords = webPage["keywords"]
#     provider = webPage["provider"]["name"]
#     datePublished = webPage["datePublished"]

#     #Get the web page image (if exists)
#     imageUrl = webPage["image"]["url"]
#     imageHeight = webPage["image"]["height"]
#     imageWidth = webPage["image"]["width"]

#     thumbnail = webPage["image"]["thumbnail"]
#     thumbnailHeight = webPage["image"]["thumbnailHeight"]
#     thumbnailWidth = webPage["image"]["thumbnailWidth"]

#     #An example: Output the webpage url, title and published date:
#     print("Url: %s. Title: %s. Published Date:%s." % (url, title, datePublished))

In [6]:
pprint(response_test)

{'_type': 'news',
 'didUMean': '',
 'relatedSearch': ['president <b>donald trump</b>',
                   'world <b>news</b>',
                   'washington',
                   'iran',
                   'uk',
                   'itv',
                   'fox <b>news</b>',
                   'read',
                   'one  page',
                   'us <b>news</b>',
                   'white house',
                   'favorite fox <b>news</b>'],
 'totalCount': 2527,
 'value': [{'datePublished': '2019-06-12T21:19:00',
            'description': 'President <b>Donald Trump</b> granted ABC '
                           '<b>News</b> an exclusive, wide-ranging interview '
                           'over the course of two days.',
            'image': {'base64Encoding': None,
                      'height': 558,
                      'thumbnail': 'https://contextualwebsearch.com/api/thumbnail/get?value=137322115715991574',
                      'thumbnailHeight': 139,
                     

In [7]:
# generate sample response to look at headers
pprint((response_test['value'][2]))

{'datePublished': '2019-04-16T08:20:00',
 'description': 'An overview of the latest <b>news</b> stories about <b>Donald '
                'Trump</b>, posted by <b>news</b> outlets in Wisconsin. We '
                'collect <b>news</b> articles from tens of thousands of '
                "relevant <b>news</b> outlets, so you'll never miss a "
                '<b>news</b> beat.',
 'image': {'base64Encoding': None,
           'height': 400,
           'thumbnail': 'https://contextualwebsearch.com/api/thumbnail/get?value=6618748489747719006',
           'thumbnailHeight': 200,
           'thumbnailWidth': 300,
           'url': 'https://spotonwisconsin.com/images/cards/wi.png',
           'width': 600},
 'isSafe': True,
 'keywords': 'donald trump news,wisconsin',
 'language': 'en',
 'provider': {'name': 'spotonwisconsin'},
 'title': '<b>Donald Trump News</b>',
 'url': 'https://spotonwisconsin.com/tag/donald-trump/'}


In [8]:
print(f' There are {totalPages} pages, with {totalCount} total articles.')

 There are 51 pages, with 2527 total articles.


In [9]:
# create dataframe to hold news data
newsFrame = pd.DataFrame
newsFrame = pd.DataFrame([['','','','','','']], columns=['datePublished','description','keywords','provider','title','url'])
newsFrame

Unnamed: 0,datePublished,description,keywords,provider,title,url
0,,,,,,


In [135]:
# Load news data into pandas dataframe

for page in range(1,totalPages+1):
    response=requests.get("https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?q={}&pageNumber={}&pageSize={}&autocorrect={}&safeSearch={}".format(q, page, pageSize, autoCorrect,safeSearch),
headers={"X-RapidAPI-Key": Your_X_RapidAPI_Key}).json()


#Get the list of most frequent searches related to the input search query
# relatedSearch = response["relatedSearch"]

#Go over each resulting item
    for webPage in response["value"]:

    #Get the web page metadata
        url = webPage["url"]
        title = webPage["title"]
        description = webPage["description"]
        keywords = webPage["keywords"]
        provider = webPage["provider"]["name"]
        datePublished = webPage["datePublished"]

#     #Get the web page image (if exists)
#     imageUrl = webPage["image"]["url"]
#     imageHeight = webPage["image"]["height"]
#     imageWidth = webPage["image"]["width"]

#     thumbnail = webPage["image"]["thumbnail"]
#     thumbnailHeight = webPage["image"]["thumbnailHeight"]
#     thumbnailWidth = webPage["image"]["thumbnailWidth"]

    #An example: Output the webpage url, title and published date:
#     print("Url: %s. Title: %s. Published Date:%s." % (url, title, datePublished))

In [5]:
# import stock data 

# Load stock data into notebook as dataframe

In [6]:
# Merge 2 DataFrames

In [7]:
# Do sentiment analysis of news data

In [None]:
# Use Matplotlib and stats to generate graphs and look for relationships