In [31]:
"""This class converts a dict to nested objects"""

class Struct(object):
    """
    Attributes will depend on the structure of object. 
    If we keep calling the 'everything' newsapi, then the attributes will be:
    
                    articles: A list of articles, each with their own objects
                    status: Status of request, should be 'ok'
                    totalResults: The total number of results available for the request, will need
                                  to use the &page= parameter to get these as only 20 articles are
                                  returned per request.
                                  
    Resource: https://stackoverflow.com/questions/1305532/convert-python-dict-to-object
    """
    def __init__(self, data):
        for name, value in data.items():
            setattr(self, name, self._wrap(value))

    def _wrap(self, value):
        if isinstance(value, (tuple, list, set, frozenset)): 
            return type(value)([self._wrap(v) for v in value])
        else:
            return Struct(value) if isinstance(value, dict) else value
        
    def __repr__(self):
        
        return str("Title: " + self.__dict__['title'] + "\n" "Description: " + self.__dict__['description'])
    
    def updateSentiment(self, sent_value):
        
        self.sentiment = sent_value
        
    def __add__(self, other):
        
        return self.sentiment + other.sentiment


"""
General class to aggregate all useful objects. 
Could customize, e.g. change structure of get_raw_data to affect data object
"""

class myclass(object):
    """
    Attributes:
                call: The url sent to newsapi
                raw_data: The dictionary returned when requesting call
    """
    
    def get_raw_data(self,call):
        r = requests.get(call).json()
        
        for i in r['articles']:
            del i['author']
            del i['urlToImage']
            t = datetime.datetime.strptime(i['publishedAt'], "%Y-%m-%dT%H:%M:%S%fZ")
            nt = t.replace(hour=0, minute=0, second=0, microsecond=0)
            i['publishedAt'] = str(nt)
            i['source'] = i['source']['name']
        
        return r
    
    def __init__(self,call):
        self.call = call
        self.data = Struct(self.get_raw_data(call))
        self.data.n_pages = self.data.totalResults/20
        
    """
    Takes call and paginates over user input number of pages to provide a list of 
    lists made up of articles
    """
        
    def paginate(self,n):
        #If page argument already exists in call, remove it
        fp = self.call.find('&page=')
        if fp > 0:
            l = [x for x, v in enumerate(self.call) if v == '&']
            l.append(len(self.call))
            nxt = l[next(x[0] for x in enumerate(l) if x[1] > fp)]
            base_call = self.call[:fp] + self.call[nxt:]
        else:
            base_call = self.call
            
        #print(base_call)
        articles_list = []
        
        for i in range(1,n+1):
            new_call = base_call + "&page=" + str(i)
            #print(new_call)
            d = Struct(self.get_raw_data(new_call))
            #print(d.articles[0].__dict__)
            articles_list.extend(d.articles)
            
        return articles_list


In [32]:
import pickle
PIK = "2018-01-01_2018-01-10.dat"

with open(PIK, "rb") as f:
    data = pickle.load(f)

In [3]:
len(data)

5300

In [39]:
data[0]

Title: Find All the Mac and iOS Apps That Support Dark Mode
Description: Photo by Vince Reinhart Dark Mode, a feature that replaces your usual black-on-white display with white-on-black, is easier on the eyes and makes you feel like you own bitcoin. The site Dark Mode List lists macOS apps, iOS apps, and websites that support the …

In [37]:
data[0].updateSentiment(1)
data[1].updateSentiment(0.5)

In [38]:
data[0] + data[1]


1.5

In [12]:
def manually_classify(l):
    print('Enter the polarity for the diplayed title and description, p for postive, n, for negative, nn for neutral.')
    print('Enter "stop" if you wish to stop. \n')
    for i in range(0,len(l)):
        print("\n")
        print(l[i].title)
        print(l[i].description)
        print("\n")
        while True:
            p = input('What is the polarity of this article? ')
            if p not in ('p', 'n', 'nn', 'stop'):
                print("\n")
                print("Not a valid response, please try again.")
            else:
                break
                
        if p == 'stop':
            break
        else:
            l[i].polarity = p


In [13]:
manually_classify(data)

Enter the polarity for the diplayed title and description, p for postive, n, for negative, nn for neutral.
Enter "stop" if you wish to stop. 



Find All the Mac and iOS Apps That Support Dark Mode
Photo by Vince Reinhart Dark Mode, a feature that replaces your usual black-on-white display with white-on-black, is easier on the eyes and makes you feel like you own bitcoin. The site Dark Mode List lists macOS apps, iOS apps, and websites that support the …


What is the polarity of this article? p


Why Kodak's Bitcoin Scheme Is a Scam You Should Avoid
Just like your friend from college who won’t stop posting about it on Facebook, Kodak is really into cryptocurrencies. During CES 2018, the company announced a two-pronged blockchain strategy: 1) its own cryptocurrency called KodakCoin designed to improve ima…


What is the polarity of this article? n


What to Know About the Major Cryptocurrencies Besides Bitcoin
2017 was the year that cryptocurrency finally went mainstream as Bitcoin ’s 

In [19]:
fil = []
for i in range(0,len(data)):
    fil.append(hasattr(data[i],'polarity'))

In [20]:
sum(fil)

7

In [21]:
from itertools import compress
labeled_data = list(compress(data, fil))

In [24]:
t = [x for x in labeled_data if x.polarity != 'nn']

In [25]:
t

[<__main__.Struct at 0x7f05b41ea630>,
 <__main__.Struct at 0x7f05ad607908>,
 <__main__.Struct at 0x7f05ad6079e8>]