From 7be6ca67bd7bf200c3fd648cce913aad3a52736b Mon Sep 17 00:00:00 2001 From: Isaiah Banta Date: Mon, 2 Apr 2018 18:16:37 -0700 Subject: [PATCH 1/5] added *~ to .gitignore, simple edit to const.py variable names, finished Type and Value Errors on newsapi_client.py --- .gitignore | 1 + newsapi/const.py | 13 ++ newsapi/newsapi_client.py | 288 +++++++++++++++++++++++++++++++------- 3 files changed, 254 insertions(+), 48 deletions(-) create mode 100644 newsapi/const.py diff --git a/.gitignore b/.gitignore index 97891c8..f0d48ba 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist/ *.rst venv/ *log.txt +*~ diff --git a/newsapi/const.py b/newsapi/const.py new file mode 100644 index 0000000..093058b --- /dev/null +++ b/newsapi/const.py @@ -0,0 +1,13 @@ +TOP_HEADLINES_URL = 'https://newsapi.org/v2/top-headlines' +EVERYTHING_URL = 'https://newsapi.org/v2/everything' +SOURCES_URL = 'https://newsapi.org/v2/sources' + +countries = {'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr','hk', + 'hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz','ph','pl', + 'pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us','ve','za'} + +languages = {'ar','en','cn','de','es','fr','he','it','nl','no','pt','ru','sv','ud'} + +categories = {'business', 'entertainment', 'general', 'health', 'science', 'sports', 'technology'} + +sort_method = {'relevancy','popularity','publishedAt'} diff --git a/newsapi/newsapi_client.py b/newsapi/newsapi_client.py index 6df17d2..3d4f938 100644 --- a/newsapi/newsapi_client.py +++ b/newsapi/newsapi_client.py @@ -1,64 +1,130 @@ import requests from newsapi.newsapi_auth import NewsApiAuth +from newsapi import const class NewsApiClient(object): - def __init__(self, api_key, api_url='https://newsapi.org/v2/'): - self.url = api_url.rstrip('/') + def __init__(self, api_key): self.auth = NewsApiAuth(api_key=api_key) def get_top_headlines(self, q=None, sources=None, language=None, country=None, category=None, page_size=None, page=None): """ - Returns live top and breaking headlines for a country, specific category in a country, single source, or multiple sources.. + Returns live top and breaking headlines for a country, specific category in a country, single source, or multiple sources.. Optional parameters: - (str) q - return headlines w/ specified coin! Valid values are: - 'bitcoin', 'etheremum', 'ripple', 'bitcoin cash', etc. + (str) q - return headlines w/ specific keyword or phrase. For example: + 'bitcoin', 'trump', 'tesla', 'ethereum', etc. (str) sources - return headlines of news sources! some Valid values are: - 'bbc-news', 'the-verge', 'abc-news', 'crypto coins news', - 'ary news','associated press','wired','aftenposten','australian financial review','axios', - 'bbc news','bild','blasting news','bloomberg','business insider','engadget','google news', - 'hacker news','info money,'recode','techcrunch','techradar','the next web','the verge' etc. + 'bbc-news', 'the-verge', 'abc-news', 'crypto coins news', + 'ary news','associated press','wired','aftenposten','australian financial review','axios', + 'bbc news','bild','blasting news','bloomberg','business insider','engadget','google news', + 'hacker news','info money,'recode','techcrunch','techradar','the next web','the verge' etc. - (str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are: - 'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh' + (str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are: + 'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh' (str) country - The 2-letter ISO 3166-1 code of the country you want to get headlines! Valid values are: - 'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr', 'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz','ph' 'pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us' + 'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr', + 'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz', + 'ph','pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us' - (str) category - The category you want to get headlines for! Valid values are: - 'business','entertainment','general','health','science','sports','technology' + (str) category - The category you want to get headlines for! Valid values are: + 'business','entertainment','general','health','science','sports','technology' - (int) page_size - The number of results to return per page (request). 20 is the default, 100 is the maximum. + (int) page_size - The number of results to return per page (request). 20 is the default, 100 is the maximum. - (int) page - Use this to page through the results if the total results found is greater than the page size. + (int) page - Use this to page through the results if the total results found is greater than the page size. """ # Define Payload payload = {} - payload['q'] = q - payload['sources'] = sources - payload['language'] = language - payload['country'] = country - payload['category'] = category - payload['pageSize'] = page_size - payload['page'] = page + + # Keyword/Phrase + if q is not None: + if type(q) == str: + payload['q'] = q + else: + raise TypeError('keyword/phrase q param should be a str') + + # Sources + if (country is not None) or (category is not None): + raise ValueError('cannot mix country/category param with sources param.') + else: + if type(sources) == str: + payload['sources'] = sources + else: + raise TypeError('sources param should be a str') + + # Language + if language is not None: + if type(language) == str: + if language in const.languages: + payload['language'] = language + else: + raise ValueError('invalid language') + else: + raise TypeError('language param should be a string') + + # Country + if country is not None: + if type(country) == str: + if country in const.countries: + payload['country'] = country + else: + raise ValueError('invalid country') + else: + raise TypeError('country param should be a string') + + # Category + if category is not None: + if type(category) == str: + if category in const.categories: + payload['category'] = category + else: + raise ValueError('invalid category') + else: + raise TypeError('category param should be a string') + + # Page Size + if page_size is not None: + if type(page_size) == int: + if page_size >= 0 and page_size <= 100: + payload['pageSize'] = page_size + else: + raise ValueError('page_size param should be an int between 1 and 100') + else: + raise TypeError('page_size param should be an int') + + # Page + if page is not None: + if type(page) == int: + if page > 0: + payload['page'] = page + else: + raise ValueError('page param should be an int greater than 0') + else: + raise TypeError('page param should be an int') # Send Request - r = requests.get(self.url + '/top-headlines', auth=self.auth, timeout=30, params=payload) + r = requests.get(const.TOP_HEADLINES_URL, auth=self.auth, timeout=30, params=payload) + + # Check Status of Request + if r.status_code != requests.codes.ok: + raise NewsAPIException(r.json()) + return r.json() - def get_everything(self, q=None, sources=None, domains=None, from_parameter=None, to=None, language=None, + def get_everything(self, q=None, sources=None, domains=None, from_param=None, to_param=None, language=None, sort_by=None, page=None, page_size=None): - """ + """ Search through millions of articles from over 5,000 large and small news sources and blogs. Optional parameters: (str) q - return headlines w/ specified coin! Valid values are: - 'bitcoin', 'etheremum', 'ripple', 'bitcoin cash', etc. + 'bitcoin', 'trump', 'tesla', 'ethereum', etc (str) sources - return headlines of news sources! some Valid values are: 'bbc-news', 'the-verge', 'abc-news', 'crypto coins news', @@ -66,8 +132,10 @@ def get_everything(self, q=None, sources=None, domains=None, from_parameter=None 'bbc news','bild','blasting news','bloomberg','business insider','engadget','google news', 'hacker news','info money,'recode','techcrunch','techradar','the next web','the verge' etc. - (str) domains - A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to. - (str) from_parameter - A date and optional time for the oldest article allowed.(e.g. 2018-03-05 or 2018-03-05T03:46:15) + (str) domains - A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to. + + (str) from_parameter - A date and optional time for the oldest article allowed. + (e.g. 2018-03-05 or 2018-03-05T03:46:15) (str) to - A date and optional time for the newest article allowed. @@ -78,37 +146,131 @@ def get_everything(self, q=None, sources=None, domains=None, from_parameter=None 'relevancy' (int) page_size - The number of results to return per page (request). 20 is the default, 100 is the maximum. - (int) page - Use this to page through the results if the total results found is greater than the page size. + + (int) page - Use this to page through the results if the total results found is greater than the page size. """ # Define Payload payload = {} - payload['q'] = q - payload['sources'] = sources - payload['domains'] = domains - payload['from'] = from_parameter - payload['to'] = to - payload['language'] = language - payload['sortBy'] = sort_by - payload['page'] = page - payload['pageSize'] = page_size + + # Keyword/Phrase + if q is not None: + if type(q) == str: + payload['q'] = q + else: + raise TypeError('keyword/phrase q param should be a str') + + # Sources + if (country is not None) or (category is not None): + raise ValueError('cannot mix country or category param with sources param.') + else: + if type(sources) == str: + payload['sources'] = sources + else: + raise TypeError('sources param should be a str') + + # Domains To Search + if domains is not None: + if type(domains) == str: + payload['domains'] = domains + else: + raise TypeError('domains param should be a string') + + # Search From This Date ... + if from_param is not None: + if type(from_param) == str: + if (len(from_param)) >= 10: + for i in range(len(from_param)): + if (i == 4 and from_param[i] != '-') or (i == 7 and from_param[i] != '-'): + raise ValueError('from_param should be in the format of YYYY-MM-DD') + else: + payload['from'] = from_param + else: + raise ValueError('from_param should be in the format of YYYY-MM-DD') + else: + raist TypeError('from_param should be a string') + + # ... To This Date + if to_param is not None: + if type(to_param) == str: + if (len(to_param)) >= 10: + for i in range(len(to_param)): + if (i == 4 and to_param[i] != '-') or (i == 7 and to_param[i] != '-'): + raise ValueError('to_param should be in the format of YYYY-MM-DD') + else: + payload['to'] = to_param + else: + raise ValueError('to_param should be in the format of YYYY-MM-DD') + else: + raist TypeError('to_param should be a string') + + + # Language + if language is not None: + if type(language) == str: + if language is not in const.languages: + raise ValueError('invalid language') + else: + payload['language'] = language + else: + raise TypeError('language param should be a string') + + + # Sort Method + if sort_by is not None: + if type(sort_by) == str: + if sort_by in const.sort_method: + payload['sortBy'] = sort_by + else: + raise ValueError('invalid sort') + else: + raise TypeError('sort_by param should be a string') + + # Page Size + if page_size is not None: + if type(page_size) == int: + if page_size >= 0 and page_size <= 100: + payload['pageSize'] = page_size + else: + raise ValueError('page_size param should be an int between 1 and 100') + else: + raise TypeError('page_size param should be an int') + + # Page + if page is not None: + if type(page) == int: + if page > 0: + payload['page'] = page + else: + raise ValueError('page param should be an int greater than 0') + else: + raise TypeError('page param should be an int') + # Send Request - r = requests.get(self.url + '/everything', auth=self.auth, timeout=30, params=payload) + r = requests.get(const.EVERYTHING_URL, auth=self.auth, timeout=30, params=payload) + + #Check Status of Request + if r.status_code != requests.codes.ok: + raise NewsAPIException(r.json()) + return r.json() def get_sources(self, category=None, language=None, country=None): - - """ + """ Returns the subset of news publishers that top headlines... Optional parameters: + (str) category - The category you want to get headlines for! Valid values are: + 'business','entertainment','general','health','science','sports','technology' (str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are: 'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh' (str) country - The 2-letter ISO 3166-1 code of the country you want to get headlines! Valid values are: - 'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr', 'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz','ph' 'pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us' + 'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr', + 'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz', + 'ph','pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us' (str) category - The category you want to get headlines for! Valid values are: 'business','entertainment','general','health','science','sports','technology' @@ -117,13 +279,43 @@ def get_sources(self, category=None, language=None, country=None): # Define Payload payload = {} - payload['category'] = category - payload['language'] = language - payload['country'] = country + + # Language + if language is not None: + if type(language) == str: + if language in const.languages: + payload['language'] = language + else: + raise ValueError('invalid language') + else: + raise TypeError('language param should be a string') + + # Country + if country is not None: + if type(country) == str: + if country in const.countries: + payload['country'] = country + else: + raise ValueError('invalid country') + else: + raise TypeError('country param should be a string') + + # Category + if category is not None: + if type(category) == str: + if category in const.categories: + payload['category'] = category + else: + raise ValueError('invalid category') + else: + raise TypeError('category param should be a string') # Send Request - r = requests.get(self.url + '/sources', auth=self.auth, timeout=30, params=payload) - return r.json() + r = requests.get(const.SOURCES_URL, auth=self.auth, timeout=30, params=payload) + # Check Status of Request + if r.status_code != requests.codes.ok: + raise NewsAPIException(r.json()) + return r.json() From aa72b837693f225624199ea3a899bbc10b1fac76 Mon Sep 17 00:00:00 2001 From: Isaiah Banta Date: Mon, 2 Apr 2018 18:34:43 -0700 Subject: [PATCH 2/5] note to window users printing to command line --- README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f87b4e..ddb4575 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,24 @@ api.get_everything(q='bitcoin') api.get_sources() ``` +#### For Windows users printing to _cmd_ or _powershell_ +You will encounter an error if you attempt to print the .json() object to the command line. This is because the '{', '}' curly braces to be printed to the console. +This becomes especially annoying if developers wish to get 'under the hood'. + +Here is the error: + UnicodeEncodeError: 'charmap' codec can't encode character '\u2019' in position 1444: character maps to + +This can be fixed by: + - installing 'win-unicode-console' + `py -mpip install win-unicode-console` + - then running it while calling your python script... + `py -mrun myPythonScript.py` + +Another option is hardcoding your console to only print in utf-8. This is a bad idea, as it could ruin many other scripts and/or make errors MUCH more difficult to track. +[More information](https://stackoverflow.com/questions/5419/python-unicode-and-the-windows-console/32176732#32176732 + + ## Support Feel free to make suggestions or provide feedback regarding the library. Thanks. -Reach out at [lisivickmatt@gmail.com]('mailto:lisivickmatt@gmail.com') \ No newline at end of file +Reach out at [lisivickmatt@gmail.com]('mailto:lisivickmatt@gmail.com') From f835e367f3dcef3eb3644dc3ae4a4c7ff37286eb Mon Sep 17 00:00:00 2001 From: Isaiah Banta Date: Mon, 2 Apr 2018 18:54:30 -0700 Subject: [PATCH 3/5] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ddb4575..3f6595e 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,8 @@ api.get_everything(q='bitcoin') api.get_sources() ``` -#### For Windows users printing to _cmd_ or _powershell_ +## For Windows users printing to _cmd_ or _powershell_ + You will encounter an error if you attempt to print the .json() object to the command line. This is because the '{', '}' curly braces to be printed to the console. This becomes especially annoying if developers wish to get 'under the hood'. From d4197338953b714e336357d8ec2979f28abcd744 Mon Sep 17 00:00:00 2001 From: Isaiah Banta Date: Tue, 3 Apr 2018 09:07:07 -0700 Subject: [PATCH 4/5] changed some typos to 'raise' statements fixed line 211 to read `if language not in const.language` changed parameter `to_param` to `to` --- newsapi/newsapi_client.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/newsapi/newsapi_client.py b/newsapi/newsapi_client.py index 3d4f938..324190d 100644 --- a/newsapi/newsapi_client.py +++ b/newsapi/newsapi_client.py @@ -188,27 +188,27 @@ def get_everything(self, q=None, sources=None, domains=None, from_param=None, to else: raise ValueError('from_param should be in the format of YYYY-MM-DD') else: - raist TypeError('from_param should be a string') + raise TypeError('from_param should be a string') # ... To This Date - if to_param is not None: - if type(to_param) == str: - if (len(to_param)) >= 10: - for i in range(len(to_param)): - if (i == 4 and to_param[i] != '-') or (i == 7 and to_param[i] != '-'): - raise ValueError('to_param should be in the format of YYYY-MM-DD') + if to is not None: + if type(to) == str: + if (len(to)) >= 10: + for i in range(len(to)): + if (i == 4 and to[i] != '-') or (i == 7 and to[i] != '-'): + raise ValueError('to should be in the format of YYYY-MM-DD') else: - payload['to'] = to_param + payload['to'] = to else: - raise ValueError('to_param should be in the format of YYYY-MM-DD') + raise ValueError('to param should be in the format of YYYY-MM-DD') else: - raist TypeError('to_param should be a string') + raise TypeError('to param should be a string') # Language if language is not None: if type(language) == str: - if language is not in const.languages: + if language not in const.languages: raise ValueError('invalid language') else: payload['language'] = language From 38074cfe3d1027bccd88f6076766cad0db3d4dc0 Mon Sep 17 00:00:00 2001 From: Isaiah Banta Date: Wed, 4 Apr 2018 09:06:03 -0700 Subject: [PATCH 5/5] changed 'to_param' to 'to' within function --- newsapi/newsapi_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newsapi/newsapi_client.py b/newsapi/newsapi_client.py index 324190d..9842c31 100644 --- a/newsapi/newsapi_client.py +++ b/newsapi/newsapi_client.py @@ -117,7 +117,7 @@ def get_top_headlines(self, q=None, sources=None, language=None, country=None, c return r.json() - def get_everything(self, q=None, sources=None, domains=None, from_param=None, to_param=None, language=None, + def get_everything(self, q=None, sources=None, domains=None, from_param=None, to=None, language=None, sort_by=None, page=None, page_size=None): """ Search through millions of articles from over 5,000 large and small news sources and blogs.