In [1]:
import requests
import datetime
import re

In [2]:
"""This class converts a dict to nested objects"""

class Struct(object):
    """
    Attributes will depend on the structure of object. 
    If we keep calling the 'everything' newsapi, then the attributes will be:
    
                    articles: A list of articles, each with their own objects
                    status: Status of request, should be 'ok'
                    totalResults: The total number of results available for the request, will need
                                  to use the &page= parameter to get these as only 20 articles are
                                  returned per request.
                                  
    Resource: https://stackoverflow.com/questions/1305532/convert-python-dict-to-object
    """
    def __init__(self, data):
        for name, value in data.items():
            setattr(self, name, self._wrap(value))

    def _wrap(self, value):
        if isinstance(value, (tuple, list, set, frozenset)): 
            return type(value)([self._wrap(v) for v in value])
        else:
            return Struct(value) if isinstance(value, dict) else value


"""
General class to aggregate all useful objects. 
Could customize, e.g. change structure of get_raw_data to affect data object
"""
class myclass(object):
    """
    Attributes:
                call: The url sent to newsapi
                raw_data: The dictionary returned when requesting call
    
    """
    
    def get_raw_data(self,call):
        r = requests.get(call).json()
        
        for i in r['articles']:
            del i['author']
            del i['urlToImage']
            t = datetime.datetime.strptime(i['publishedAt'], "%Y-%m-%dT%H:%M:%S%fZ")
            nt = t.replace(hour=0, minute=0, second=0, microsecond=0)
            i['publishedAt'] = str(nt)
            i['source'] = i['source']['name']
        
        return r
    
    def __init__(self,call):
        self.call = call
        self.data = Struct(self.get_raw_data(call))
        self.data.n_pages = self.data.totalResults/20
        
"""Takes call and paginates over user input number of pages to provide a list of lists made up of articles"""
        
    def paginate(self,n):
        #If page argument already exists in call, remove it
        fp = self.call.find('&page=')
        if fp > 0:
            l = [x for x, v in enumerate(self.call) if v == '&']
            l.append(len(self.call))
            nxt = l[next(x[0] for x in enumerate(l) if x[1] > fp)]
            base_call = self.call[:fp] + self.call[nxt:]
        else:
            base_call = self.call
            
        #print(base_call)
        articles_list = []
        
        for i in range(1,n+1):
            new_call = base_call + "&page=" + str(i)
            #print(new_call)
            d = Struct(self.get_raw_data(new_call))
            #print(d.articles[0].__dict__)
            articles_list.append(d.articles)
            
        return articles_list


In [3]:
n =  myclass('https://newsapi.org/v2/everything?q=(BTC OR bitcoin)&from=2018-01-11&to=2018-01-13&language=en&sortBy=popularity&apiKey=6d00cdefd3bc4ee38f8a7af69ac5bec4')

In [4]:
n.data.__dict__

{'articles': [<__main__.Struct at 0x7f66d4aa7d30>,
  <__main__.Struct at 0x7f66bed8e588>,
  <__main__.Struct at 0x7f66bed8e048>,
  <__main__.Struct at 0x7f66bed8e0f0>,
  <__main__.Struct at 0x7f66bed74ba8>,
  <__main__.Struct at 0x7f66bed74e10>,
  <__main__.Struct at 0x7f66bed74be0>,
  <__main__.Struct at 0x7f66bed74cf8>,
  <__main__.Struct at 0x7f66bed74da0>,
  <__main__.Struct at 0x7f66bed74e48>,
  <__main__.Struct at 0x7f66bed74dd8>,
  <__main__.Struct at 0x7f66bed74cc0>,
  <__main__.Struct at 0x7f66bed74fd0>,
  <__main__.Struct at 0x7f66bed74e80>,
  <__main__.Struct at 0x7f66bed74eb8>,
  <__main__.Struct at 0x7f66bed74f98>,
  <__main__.Struct at 0x7f66bed74c50>,
  <__main__.Struct at 0x7f66bed74c88>,
  <__main__.Struct at 0x7f66bed74f60>,
  <__main__.Struct at 0x7f66bed74f28>],
 'n_pages': 89.2,
 'status': 'ok',
 'totalResults': 1784}

In [5]:
n.data.articles[0].__dict__

{'description': 'So you’re ready to buy some cryptocurrency. Maybe you’ve been reading up on blockchain technology and you’re convinced it really is the future. Or maybe you watched a friend get rich off Bitcoin and you’re still kicking yourself for not doing the same. Read m…',
 'publishedAt': '2018-01-11 00:00:00',
 'source': 'Lifehacker.com',
 'title': 'How to Buy Cryptocurrency',
 'url': 'https://lifehacker.com/how-to-buy-cryptocurrency-1821935329'}

In [6]:
l = n.paginate(3)
l

https://newsapi.org/v2/everything?q=(BTC OR bitcoin)&from=2018-01-11&to=2018-01-13&language=en&sortBy=popularity&apiKey=6d00cdefd3bc4ee38f8a7af69ac5bec4
https://newsapi.org/v2/everything?q=(BTC OR bitcoin)&from=2018-01-11&to=2018-01-13&language=en&sortBy=popularity&apiKey=6d00cdefd3bc4ee38f8a7af69ac5bec4&page=1
{'title': 'How to Buy Cryptocurrency', 'source': 'Lifehacker.com', 'url': 'https://lifehacker.com/how-to-buy-cryptocurrency-1821935329', 'description': 'So you’re ready to buy some cryptocurrency. Maybe you’ve been reading up on blockchain technology and you’re convinced it really is the future. Or maybe you watched a friend get rich off Bitcoin and you’re still kicking yourself for not doing the same. Read m…', 'publishedAt': '2018-01-11 00:00:00'}
https://newsapi.org/v2/everything?q=(BTC OR bitcoin)&from=2018-01-11&to=2018-01-13&language=en&sortBy=popularity&apiKey=6d00cdefd3bc4ee38f8a7af69ac5bec4&page=2
{'title': 'Bitcoin and the pumpkin spice latte problem', 'source': 'The N

[[<__main__.Struct at 0x7f66bed8da58>,
  <__main__.Struct at 0x7f66bed8d780>,
  <__main__.Struct at 0x7f66bed8d1d0>,
  <__main__.Struct at 0x7f66bed8d2b0>,
  <__main__.Struct at 0x7f66bed8dbe0>,
  <__main__.Struct at 0x7f66bed8d438>,
  <__main__.Struct at 0x7f66bed8da20>,
  <__main__.Struct at 0x7f66bed8d9b0>,
  <__main__.Struct at 0x7f66bed8d978>,
  <__main__.Struct at 0x7f66bed8d4e0>,
  <__main__.Struct at 0x7f66bed8dcf8>,
  <__main__.Struct at 0x7f66bed8dfd0>,
  <__main__.Struct at 0x7f66bed8df28>,
  <__main__.Struct at 0x7f66bed8d828>,
  <__main__.Struct at 0x7f66bed8d7b8>,
  <__main__.Struct at 0x7f66bed8d128>,
  <__main__.Struct at 0x7f66bed8d6a0>,
  <__main__.Struct at 0x7f66bed8dc50>,
  <__main__.Struct at 0x7f66bed8dac8>,
  <__main__.Struct at 0x7f66bed8d390>],
 [<__main__.Struct at 0x7f66bed8d588>,
  <__main__.Struct at 0x7f66bed8d9e8>,
  <__main__.Struct at 0x7f66bed8d400>,
  <__main__.Struct at 0x7f66bed8dba8>,
  <__main__.Struct at 0x7f66bed8d940>,
  <__main__.Struct at 0x

In [7]:
l[0][0].__dict__


{'description': 'So you’re ready to buy some cryptocurrency. Maybe you’ve been reading up on blockchain technology and you’re convinced it really is the future. Or maybe you watched a friend get rich off Bitcoin and you’re still kicking yourself for not doing the same. Read m…',
 'publishedAt': '2018-01-11 00:00:00',
 'source': 'Lifehacker.com',
 'title': 'How to Buy Cryptocurrency',
 'url': 'https://lifehacker.com/how-to-buy-cryptocurrency-1821935329'}

In [8]:
l[1][0].__dict__

{'description': 'As Bitcoin continues to test new lows investors have more to worry about than an impending bottom. As the saying goes, “death and taxes” are the only certainties. For cryptocurrency investors, the latter will undoubtedly be responsible for more anxiety than t…',
 'publishedAt': '2018-01-11 00:00:00',
 'source': 'The Next Web',
 'title': 'Bitcoin and the pumpkin spice latte problem',
 'url': 'https://thenextweb.com/?p=1100927'}

In [9]:
l[2][0].__dict__

{'description': '(This is a sponsored post.) The newly released Q3 2017 Global DDoS Threat Landscape Report features insights on attacks and mitigation. These are some of the key findings: Bitcoin was one of the most targeted industries High packet rate attacks grew more comm…',
 'publishedAt': '2018-01-11 00:00:00',
 'source': 'Incapsula.com',
 'title': '\u200bIncapsula’s Global DDoS Threat Landscape Report',
 'url': 'http://lp.incapsula.com/ddos-threat-landscape-report.html?irgwc=1&clickid=WFeyEMTH4SmMxMs0UWSh5V6WUkmVfPWeqyRf2s0&utm_medium=ir&utm_source=aff&utm_campaign=253469'}

In [13]:
print(l[0][0].title)
print(l[0][0].description)
l[0][0].polarity = input('What is the polarity of this article? ')

How to Buy Cryptocurrency
So you’re ready to buy some cryptocurrency. Maybe you’ve been reading up on blockchain technology and you’re convinced it really is the future. Or maybe you watched a friend get rich off Bitcoin and you’re still kicking yourself for not doing the same. Read m…
What is the polarity of this article? 0.5


In [15]:
l[0][0].__dict__

{'description': 'So you’re ready to buy some cryptocurrency. Maybe you’ve been reading up on blockchain technology and you’re convinced it really is the future. Or maybe you watched a friend get rich off Bitcoin and you’re still kicking yourself for not doing the same. Read m…',
 'polarity': '0.5',
 'publishedAt': '2018-01-11 00:00:00',
 'source': 'Lifehacker.com',
 'title': 'How to Buy Cryptocurrency',
 'url': 'https://lifehacker.com/how-to-buy-cryptocurrency-1821935329'}

In [28]:
def manually_classify(l):
    print('Enter the polarity for the diplayed title and description, 1.0 for postive, 0.0, for negative, 0.5 for neutral.')
    print('Enter "stop" if you wish to stop. \n')
    for i in range(0,len(l)):
        for j in range(0,len(l[i])):
            print(l[i][j].title)
            print(l[i][j].description)
            print("\n")
            p = input('What is the polarity of this article? ')
            if p == 'stop':
                break
            else:
                l[i][j].polarity = p
                
        else:
            continue  # executed if the j loop ended normally (no break)
        break  # executed if 'continue' was skipped (break)