In [1]:
# Dependencies
import matplotlib.pyplot as plt
import requests
from scipy import stats
import pandas as pd
from config import (rapidapi_key, rapidapi_host)
from pprint import pprint

### Retrive all articles with keyword 'cryptocurrency' from the NewsSearchAPI

In [2]:
# set request parameters
url_ws = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"

params = {
    "q": "taylor swift",
    "pageNumber": "1",
    "pageSize": "10",
    "autoCorrect": "true",
    "fromPublishedDate": "2021-01-16T05:50:06",
    "toPublishedDate": "2021-01-20T05:50:06"
}

headers = {
    'x-rapidapi-key': rapidapi_key,
    'x-rapidapi-host': rapidapi_host
    }

In [3]:
# get and print example response
ws_response = requests.get(url_ws, headers=headers, params=params).json()

In [4]:
pprint(ws_response)

{'_type': 'news',
 'didUMean': '',
 'relatedSearch': [],
 'totalCount': 38,
 'value': [{'body': 'WhatsApp\n'
                    'Taylor Swift surprised fans again by releasing her ninth '
                    'studio album evermore on Dec. 11. evermore comes less '
                    'than five months after her release of the current '
                    'Grammy-nominated for Album of the Year, folklore. Now, '
                    'returning to the same fantastical realm that she '
                    'introduced to the world a mere five months ago, Swift '
                    'ventures deeper into the folklorian woodsand touches on '
                    'stories that folklore didnt finish to tell.\n'
                    'evermore, which acts as a sister album to folklore, is '
                    'more sonically influenced by Swifts country days and 1989 '
                    'days, seen evident in the respective songs cowboy like me '
                    'and long story short. Exte

In [5]:
# find the relevant endpoint for the main request
print(ws_response['totalCount'])

38


In [8]:
# create a dictonary of dates we want to pass into the request
month = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
year = ['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']
months = []

In [9]:
months = [month for i in year]
months

[['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']]

In [10]:
df1 = pd.DataFrame({'Month':months, 'Year': year})
df1

Unnamed: 0,Month,Year
0,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2013
1,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2014
2,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2015
3,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2016
4,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2017
5,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2018
6,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2019
7,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2020
8,"[01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12]",2021


In [11]:
df1 = df1.explode('Month').reset_index(drop=True)
df1

Unnamed: 0,Month,Year
0,01,2013
1,02,2013
2,03,2013
3,04,2013
4,05,2013
...,...,...
103,08,2021
104,09,2021
105,10,2021
106,11,2021


In [12]:
# remove the dates which are out of scope
indexes_to_drop = [0,1,2,99,100,101,102,103,104,105,106,107]

new_df = df1.drop(indexes_to_drop)
new_df

Unnamed: 0,Month,Year
3,04,2013
4,05,2013
5,06,2013
6,07,2013
7,08,2013
...,...,...
94,11,2020
95,12,2020
96,01,2021
97,02,2021


In [13]:
# save as dictonary for easier itteraction during API request
dict_copy = new_df.to_dict('records')
dict_copy

[{'Month': '04', 'Year': '2013'},
 {'Month': '05', 'Year': '2013'},
 {'Month': '06', 'Year': '2013'},
 {'Month': '07', 'Year': '2013'},
 {'Month': '08', 'Year': '2013'},
 {'Month': '09', 'Year': '2013'},
 {'Month': '10', 'Year': '2013'},
 {'Month': '11', 'Year': '2013'},
 {'Month': '12', 'Year': '2013'},
 {'Month': '01', 'Year': '2014'},
 {'Month': '02', 'Year': '2014'},
 {'Month': '03', 'Year': '2014'},
 {'Month': '04', 'Year': '2014'},
 {'Month': '05', 'Year': '2014'},
 {'Month': '06', 'Year': '2014'},
 {'Month': '07', 'Year': '2014'},
 {'Month': '08', 'Year': '2014'},
 {'Month': '09', 'Year': '2014'},
 {'Month': '10', 'Year': '2014'},
 {'Month': '11', 'Year': '2014'},
 {'Month': '12', 'Year': '2014'},
 {'Month': '01', 'Year': '2015'},
 {'Month': '02', 'Year': '2015'},
 {'Month': '03', 'Year': '2015'},
 {'Month': '04', 'Year': '2015'},
 {'Month': '05', 'Year': '2015'},
 {'Month': '06', 'Year': '2015'},
 {'Month': '07', 'Year': '2015'},
 {'Month': '08', 'Year': '2015'},
 {'Month': '09

In [14]:
# set parameters for the main request
url_ws = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"

params = {
    "q": "cryptocurrency",
    "pageNumber": "1",
    "pageSize": "1",
    "autoCorrect": "true",
    "fromPublishedDate": "",
    "toPublishedDate": ""
}

headers = {
    'x-rapidapi-key': rapidapi_key,
    'x-rapidapi-host': rapidapi_host
    }


In [15]:
# loop through the dates dictionary and request data for each month
for i, rec in enumerate(dict_copy):
    
    if i < len(dict_copy)-1:
        
        params["fromPublishedDate"] = str(dict_copy[i]["Year"]) + "-" + str(dict_copy[i]["Month"]) + "-01T00:00:00"
        params["toPublishedDate"] = str(dict_copy[i+1]["Year"]) + "-" + str(dict_copy[i+1]["Month"]) + "-01T00:00:00"
        
        try:
            print(f"Processing Record {i}")
            results = requests.get(url_ws, headers=headers, params=params).json()
            dict_copy[i]['Num_of_articles'] = results['totalCount']
            
        except:
            print("Request failed. Skipping...")
            pass  
    

Processing Record 0
Processing Record 1
Processing Record 2
Processing Record 3
Processing Record 4
Processing Record 5
Processing Record 6
Processing Record 7
Processing Record 8
Processing Record 9
Processing Record 10
Processing Record 11
Processing Record 12
Processing Record 13
Processing Record 14
Processing Record 15
Processing Record 16
Processing Record 17
Processing Record 18
Processing Record 19
Processing Record 20
Processing Record 21
Processing Record 22
Processing Record 23
Processing Record 24
Processing Record 25
Processing Record 26
Processing Record 27
Processing Record 28
Processing Record 29
Processing Record 30
Processing Record 31
Processing Record 32
Processing Record 33
Processing Record 34
Processing Record 35
Processing Record 36
Processing Record 37
Processing Record 38
Processing Record 39
Processing Record 40
Processing Record 41
Processing Record 42
Processing Record 43
Processing Record 44
Processing Record 45
Processing Record 46
Processing Record 47
Pr

In [17]:
# write dictonary into the DataFrame
data_df = pd.DataFrame(dict_copy)
data_df

Unnamed: 0,Month,Year,Num_of_articles
0,04,2013,3.0
1,05,2013,1.0
2,06,2013,0.0
3,07,2013,0.0
4,08,2013,0.0
...,...,...,...
91,11,2020,61.0
92,12,2020,161.0
93,01,2021,239.0
94,02,2021,386.0


In [1]:
# export results into csv file
data_df.to_csv("Output/data_news_api.csv", encoding="utf-8", index=False, header=True)

NameError: name 'data_df' is not defined