In [1]:
import sqlite3
import pickle

from collections import defaultdict as dd

from tqdm import tqdm
from time import sleep

import pandas as pd
import numpy as np

from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup

In [2]:
# source: https://realpython.com/python-web-scraping-practical-introduction/
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

In [3]:
# database connection
conn = sqlite3.connect('stocks_data/sp500.db')
c = conn.cursor()

# get tickers and names
symbols = pd.read_sql("SELECT DISTINCT(symbol) FROM fact_table", con=conn)
dates = []
names = [] 
for i, row in symbols.iterrows():
    s = row['symbol']
    d = pd.read_sql("SELECT min(date), max(date) FROM fact_table WHERE symbol='{}'".format(s), con=conn)
    n = pd.read_sql("SELECT name, sector FROM dimension_table WHERE symbol='{}'".format(s), con=conn)
    dates.append(np.array(d)[0])
    names.append(np.array(n)[0])
ticker_df = pd.concat((symbols, pd.DataFrame(names, columns=['name','sector']), pd.DataFrame(dates, columns=['startdate', 'enddate'])), axis=1)

In [4]:
# one off, import tesla
ticker, name, industry, sd, ed = np.array(ticker_df.iloc[3])
tesla_df = pd.read_csv('stocks_data/tesla.csv')
tesla_df['date']
name = 'tesla'
textd = dd(dict)
visited = set()

In [5]:
# non-tesla companies
ticker, name, industry, sd, ed = np.array(ticker_df.iloc[3])
textd = dd(dict)
visited = set()
# name = name.replace('.', '')
# name = name.replace(',', '')
name = 'amazon.com inc' # 'amazon.com inc', 'apple inc', 'disney', 'goldman sachs', 'tesla'

In [97]:
# api key
key = 'tTGUbrG64HtoMQraJqLRgh4K1CNDqAxD'

# iterate over articles in the month and if found a match, go to url and scrape data
for year in np.arange(2010, 2019):
    for month in np.arange(1, 13):
        print(year, month)
        link = 'https://api.nytimes.com/svc/archive/v1/' + str(year) + '/' + str(month) + '.json?api-key=' + key
        metadata = pd.read_json(link)
        
        # iterate over each article
        for article in metadata['response']['docs']:
            for line in article['keywords']:

                # match and never came across the article in the month of interest
                if name.lower() in line['value'].lower() and article['web_url'] not in visited:                
                    visited.add(article['web_url'])
                    date = article['pub_date'].split('T')[0]
                    
                    print(article['web_url'])
                    print(article['keywords'])
                    
                    # scrape text
                    try:
                        doc = BeautifulSoup(simple_get(article['web_url']), 'html.parser')
                        paragraphs = doc.findAll('p', itemprop='articleBody')
                        if not paragraphs:
                            print('no articlebody')
                            paragraphs = doc.findAll('p', attrs={'class': 'story-body-text story-content'})
                        if not paragraphs:
                            print('no story body text')
                            paragraphs = doc.findAll('p', attrs={'class': 'css-1ygdjhk evys1bk0'})
                        if not paragraphs:
                            print('no css tag')
                        for i in range(len(paragraphs)):
                            textd[article['pub_date'].split('T')[0]][i] = paragraphs[i].get_text().lower()
                        # debug
                        print(textd[article['pub_date'].split('T')[0]] != {})
                    except Exception as exc:
                        print('traceback.format_exc()')
                        print(exc)
                    # next article
                    break            

2010 1
https://gadgetwise.blogs.nytimes.com/2010/01/06/vudu-expands-its-service-to-more-tv-makers/
[{'rank': '11', 'name': 'organizations', 'value': 'Vudu Inc'}, {'rank': '4', 'name': 'organizations', 'value': 'LG Electronics'}, {'rank': '10', 'name': 'organizations', 'value': 'Vizio Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Boxee Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'name': 'organizations', 'value': 'Netflix Inc'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'organizations', 'value': 'Roku'}, {'rank': '7', 'name': 'organizations', 'value': 'Roxio Inc'}, {'rank': '13', 'name': 'unknown', 'value': 'Netflix Inc|NFLX|NASDAQ'}, {'rank': '8', 'name': 'organizations', 'value': 'Sharp Corp'}, {'rank': '9', 'name': 'organizations', 'value': 'Toshiba Corporation'}, {'rank': '12', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}]
True
https://gadgetwise.blogs.nytimes.com/2010/01/07/pla

[{'rank': '11', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '4', 'name': 'subject', 'value': 'Book Trade'}, {'rank': '10', 'name': 'organizations', 'value': 'Macmillan Publishers'}, {'rank': '3', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '2', 'name': 'persons', 'value': 'Mantel, Hilary'}, {'rank': '5', 'name': 'subject', 'value': 'Books and Literature'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'subject', 'value': 'Electronic Books and Readers'}, {'rank': '7', 'name': 'subject', 'value': 'Royalties'}, {'rank': '8', 'name': 'subject', 'value': 'Writing and Writers'}, {'rank': '9', 'name': 'organizations', 'value': 'Amazon.com Inc'}]
True
https://www.nytimes.com/2010/02/04/technology/04amazon.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '3', 'is_major': 'N', 'name': 'subject',

[{'rank': '11', 'name': 'unknown', 'value': 'Apple Inc|AAPL|NASDAQ'}, {'rank': '4', 'name': 'subject', 'value': 'Books and Literature'}, {'rank': '10', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '3', 'name': 'subject', 'value': 'Book Trade'}, {'rank': '2', 'name': 'subject', 'value': 'Blogs and Blogging (Internet)'}, {'rank': '5', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'subject', 'value': 'iPad'}, {'rank': '7', 'name': 'subject', 'value': 'iPhone'}, {'rank': '8', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '9', 'name': 'organizations', 'value': 'Apple Inc'}]
True
https://bits.blogs.nytimes.com/2010/03/05/apple-ipad-arrives-in-stores-on-april-3/
[{'rank': '11', 'name': 'subject', 'value': 'Wireless Communications'}, {'rank': '4', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '10', 'name': 'subject', 'value': 'Shopping and Retail'}, {'r

[{'value': 'Cloud Computing', 'is_major': 'N', 'rank': '1', 'name': 'subject'}, {'value': 'Computers and the Internet', 'is_major': 'N', 'rank': '2', 'name': 'subject'}, {'value': 'Enterprise Computing', 'is_major': 'N', 'rank': '3', 'name': 'subject'}, {'value': 'Data Centers', 'is_major': 'N', 'rank': '4', 'name': 'subject'}, {'value': 'Amazon.com Inc', 'is_major': 'N', 'rank': '5', 'name': 'organizations'}, {'value': 'Microsoft Corp', 'is_major': 'N', 'rank': '6', 'name': 'organizations'}, {'value': 'Google Inc', 'is_major': 'N', 'rank': '7', 'name': 'organizations'}, {'value': 'AT&T Corp', 'is_major': 'N', 'rank': '8', 'name': 'organizations'}]
no articlebody
True
https://dealbook.nytimes.com/2010/04/20/another-view-peering-clearly-at-the-future/
[{'rank': '11', 'name': 'subject', 'value': 'Start-ups'}, {'rank': '4', 'name': 'subject', 'value': 'Data Storage'}, {'rank': '21', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '10', 'name': 'subject', 'value': 'Soft

[{'value': 'Antitrust Actions and Laws', 'is_major': 'N', 'rank': '1', 'name': 'subject'}, {'value': 'iTunes', 'is_major': 'N', 'rank': '2', 'name': 'organizations'}, {'value': 'Apple Inc', 'is_major': 'N', 'rank': '3', 'name': 'organizations'}, {'value': 'Amazon.com Inc', 'is_major': 'N', 'rank': '4', 'name': 'organizations'}, {'value': 'Justice Department', 'is_major': 'N', 'rank': '5', 'name': 'organizations'}, {'value': 'Computers and the Internet', 'is_major': 'N', 'rank': '6', 'name': 'subject'}, {'value': 'Music', 'is_major': 'N', 'rank': '7', 'name': 'subject'}, {'value': 'Recordings and Downloads (Audio)', 'is_major': 'N', 'rank': '8', 'name': 'subject'}]
no articlebody
True
https://bits.blogs.nytimes.com/2010/05/27/olpc-partners-with-marvell-to-launch-100-tablet/
[{'rank': '11', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '4', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '10', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '1

[{'rank': '1', 'is_major': 'N', 'value': 'Computers and the Internet', 'name': 'subject'}, {'rank': '2', 'is_major': 'N', 'value': 'Company Reports', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Amazon.com Inc', 'name': 'organizations'}]
no articlebody
True
https://dealbook.nytimes.com/2010/07/26/will-zynga-become-the-google-of-games/
[{'rank': '11', 'name': 'organizations', 'value': 'Zynga Game Network'}, {'rank': '4', 'name': 'organizations', 'value': 'eBay Inc'}, {'rank': '10', 'name': 'organizations', 'value': 'Yahoo! Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'DreamWorks Animation SKG Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '16', 'name': 'unknown', 'value': 'Yahoo Inc|YHOO|NASDAQ'}, {'rank': '5', 'name': 'organizations', 'value': 'Facebook.com'}, {'rank': '15', 'name': 'unknown', 'value': 'Google Inc|GOOG|NASDAQ'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'organizations'

[{'rank': '4', 'name': 'subject', 'value': 'Shopping and Retail'}, {'rank': '3', 'name': 'subject', 'value': 'iPad'}, {'rank': '2', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '5', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '7', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '8', 'name': 'unknown', 'value': 'Apple Inc|AAPL|NASDAQ'}]
True
https://bits.blogs.nytimes.com/2010/10/29/for-e-reader-customers-competition-is-paying-off/
[{'rank': '11', 'name': 'organizations', 'value': 'Barnes &'}, {'rank': '4', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '10', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '3', 'name': 'persons', 'value': 'Lynch, William'}, {'rank': '2', 'name': 'persons', 'value': 'Bezos, Jeffrey P'}, {'rank': '16', 'name': 'unknown', 'value': 'Google Inc|GOOG|NASDAQ'}, {'rank': '5', 'nam

[{'rank': '4', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'name': 'subject', 'value': 'Electronic Books and Readers'}, {'rank': '2', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '5', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}]
True
https://pogue.blogs.nytimes.com/2011/02/08/page-numbers-for-kindle-books-an-imperfect-solution/
[{'rank': '4', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'name': 'subject', 'value': 'Electronic Books and Readers'}, {'rank': '2', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '5', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}]
True
https://www.nytimes.com/2011/02/17/business/media/17borders.html
[{'rank': '1', 'is_major': 'N', 'value': 'Borders Group Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'Bankruptcies', 'name': 'subj

[{'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '10', 'name': 'organizations', 'value': 'Twitter'}, {'rank': '11', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '12', 'name': 'unknown', 'value': 'Apple Incorporated|AAPL|NASDAQ'}, {'rank': '13', 'name': 'unknown', 'value': 'Google Inc|GOOG|NASDAQ'}, {'rank': '2', 'name': 'persons', 'value': 'Jobs, Steven P'}, {'rank': '3', 'name': 'subject', 'value': 'Cloud Computing'}, {'rank': '4', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '5', 'name': 'subject', 'value': 'New Models, Design and Products'}, {'rank': '6', 'name': 'subject', 'value': 'Software'}, {'rank': '7', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '8', 'name': 'organizations', 'value': 'Apple Incorporated'}, {'rank': '9', 'name': 'organizations', 'value': 'Google Inc'}]
True
2011 6
https://gadgetwise.blogs.nytimes.com/2011/06/05/googles-new-chromebook-explained/
[{'rank': '1', 'name': 'type_of_

True
https://bits.blogs.nytimes.com/2011/08/12/amazon-cracks-down-on-some-e-book-publishers/
[{'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '2', 'name': 'subject', 'value': 'Electronic Books and Readers'}, {'rank': '3', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '4', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}]
True
https://www.nytimes.com/2011/08/14/sunday-review/obsessing-over-amazons-book-sale-statistics.html
[{'rank': '1', 'is_major': 'N', 'value': 'Books and Literature', 'name': 'subject'}, {'rank': '2', 'is_major': 'N', 'value': 'Shopping and Retail', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '4', 'is_major': 'N', 'value': 'Writing and Writers', 'name': 'subject'}, {'rank': '5', 'is_major': 'N', 'value': 'Book Trade and Publishing', 'name': 'subject'}, {'rank': '6', 'is_major': 'N', 'value': 'Sales', 'name': 'subject'}]
no articlebody
True
https://boss.bl

[{'rank': '4', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'name': 'subject', 'value': 'Tablet Computers'}, {'rank': '2', 'name': 'subject', 'value': 'iPad'}, {'rank': '5', 'name': 'organizations', 'value': 'Research in Motion Ltd'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'unknown', 'value': 'Kindle Fire'}, {'rank': '7', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}]
True
https://www.nytimes.com/2011/09/29/technology/amazon-unveils-tablet-that-undercuts-ipads-price.html
[{'rank': '1', 'is_major': 'N', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'Tablet Computers', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'iPad', 'name': 'subject'}, {'rank': '4', 'is_major': 'N', 'value': 'Electronic Books and Readers', 'name': 'subject'}, {'rank': '5', 'is_major': 'N', 'value': 'Amazon Kindle', 'name': 'subject'}]
no articlebody
True
https://dealbook.nytimes.

[{'rank': '4', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '10', 'name': 'unknown', 'value': 'Google Inc|GOOG|NASDAQ'}, {'rank': '3', 'name': 'subject', 'value': 'Lotteries'}, {'rank': '2', 'name': 'subject', 'value': 'Initial Public Offerings'}, {'rank': '5', 'name': 'organizations', 'value': 'Demand Media Inc'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '7', 'name': 'organizations', 'value': 'Groupon'}, {'rank': '8', 'name': 'organizations', 'value': 'pets.com'}, {'rank': '9', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}]
True
https://mediadecoder.blogs.nytimes.com/2011/11/07/new-nook-is-latest-entry-in-tablet-battle/
[{'rank': '11', 'name': 'unknown', 'value': 'Barnes & Noble Inc|BKS|NYSE'}, {'rank': '4', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '10', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '3', 'name': 'persons', 'value':

[{'rank': '11', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}, {'rank': '4', 'name': 'subject', 'value': 'Colleges and Universities'}, {'rank': '10', 'name': 'organizations', 'value': 'Reed College'}, {'rank': '3', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '2', 'name': 'persons', 'value': 'Bezos, Jeffrey P'}, {'rank': '5', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '6', 'name': 'subject', 'value': 'E-Books and Readers'}, {'rank': '7', 'name': 'subject', 'value': 'Textbooks'}, {'rank': '8', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '9', 'name': 'organizations', 'value': 'Apple Incorporated'}, {'rank': '12', 'name': 'unknown', 'value': 'Apple Incorporated|AAPL|NASDAQ'}]
True
https://mediadecoder.blogs.nytimes.com/2012/01/22/tablet-and-e-reader-sales-soar/
[{'rank': '4', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'name': 'subject', 'valu

[{'name': 'persons', 'value': 'HOANG, JUNIE'}, {'name': 'persons', 'value': 'CIEPLEY, MICHAEL'}, {'name': 'organizations', 'value': 'INTERNET MOVIE DATABASE'}, {'name': 'organizations', 'value': 'AMAZON.COM INC'}, {'name': 'subject', 'value': 'MOVIES'}, {'name': 'subject', 'value': 'AGE, CHRONOLOGICAL'}, {'name': 'subject', 'value': 'ACTORS AND ACTRESSES'}, {'name': 'subject', 'value': 'SUITS AND LITIGATION'}, {'name': 'subject', 'value': 'COMPUTERS AND THE INTERNET'}, {'name': 'subject', 'value': 'PRIVACY'}]
True
https://www.nytimes.com/2012/03/07/realestate/commercial/amazon-leads-an-office-upswing-in-seattle.html
[{'rank': '1', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'Y', 'value': 'Seattle (Wash)', 'name': 'glocations'}, {'rank': '3', 'is_major': 'Y', 'value': 'Real Estate (Commercial)', 'name': 'subject'}, {'rank': '4', 'is_major': 'N', 'value': 'Vulcan Inc', 'name': 'organizations'}, {'rank': '5', 'is_major': 'N', 'value': 'S

[{'name': 'persons', 'value': 'BEZOS, JEFFREY P'}, {'name': 'organizations', 'value': 'AMAZON.COM INC'}, {'name': 'subject', 'value': 'SHOPPING AND RETAIL'}, {'name': 'subject', 'value': 'ADVERTISING AND MARKETING'}, {'name': 'subject', 'value': 'PRICES (FARES, FEES AND RATES)'}, {'name': 'subject', 'value': 'E-COMMERCE'}, {'name': 'subject', 'value': 'FASHION AND APPAREL'}, {'name': 'subject', 'value': 'COMPUTERS AND THE INTERNET'}]
True
https://www.nytimes.com/2012/05/20/opinion/sunday/friedman-do-you-want-the-good-news-first.html
[{'name': 'glocations', 'value': 'SILICON VALLEY (CALIF)'}, {'name': 'glocations', 'value': 'SEATTLE (WASH)'}, {'name': 'organizations', 'value': 'AMAZON.COM INC'}, {'name': 'organizations', 'value': 'MICROSOFT CORPORATION'}, {'name': 'subject', 'value': 'EDUCATION (K-12)'}, {'name': 'subject', 'value': 'IMMIGRATION AND EMIGRATION'}, {'name': 'subject', 'value': 'FEDERAL BUDGET (US)'}, {'name': 'subject', 'value': 'RESEARCH'}, {'name': 'subject', 'value': '

[{'rank': '1', 'name': 'persons', 'value': 'Bezos, Jeffrey P'}, {'rank': '2', 'name': 'persons', 'value': 'Bezos, MacKenzie'}, {'rank': '1', 'name': 'glocations', 'value': 'Washington (State)'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '1', 'name': 'subject', 'value': 'Philanthropy'}, {'rank': '2', 'name': 'subject', 'value': 'Referendums'}, {'rank': '3', 'name': 'subject', 'value': 'Same-Sex Marriage, Civil Unions and Domestic Partnerships'}]
True
https://www.nytimes.com/2012/07/27/technology/amazon-delivers-on-revenue-but-not-on-profit.html
[{'rank': '1', 'is_major': 'N', 'value': 'Company Reports', 'name': 'subject'}, {'rank': '2', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}]
no articlebody
True
https://mediadecoder.blogs.nytimes.com/2012/07/31/amazon-revamps-its-cloud-music-player-to-compete-with-itunes/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'App

https://bits.blogs.nytimes.com/2012/09/20/designing-for-multiple-screens-is-about-consistency/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Twitter'}, {'rank': '1', 'name': 'subject', 'value': 'iPad'}, {'rank': '2', 'name': 'subject', 'value': 'iPhone'}, {'rank': '3', 'name': 'subject', 'value': 'Mobile Applications'}, {'rank': '4', 'name': 'subject', 'value': 'Smartphones'}, {'rank': '5', 'name': 'subject', 'value': 'Tablet Computers'}]
True
https://bits.blogs.nytimes.com/2012/09/21/daily-report-wal-mart-deletes-the-kindle/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Wal-Mart Stores Inc'}, {'rank': '1', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '2', 'name': 'subject', 'value': 'E-Books and Readers'}, {'rank': '3', 'name': 'subject', 'value': 'Shopping and Retail'}]
Tru

[{'rank': '1', 'is_major': 'Y', 'value': 'E-Commerce', 'name': 'subject'}, {'rank': '2', 'is_major': 'N', 'value': 'Shopping and Retail', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Prices (Fares, Fees and Rates)', 'name': 'subject'}, {'rank': '4', 'is_major': 'Y', 'value': 'Black Friday and Cyber Monday (Shopping)', 'name': 'subject'}, {'rank': '5', 'is_major': 'N', 'value': 'Target Corporation', 'name': 'organizations'}, {'rank': '6', 'is_major': 'N', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '7', 'is_major': 'N', 'value': 'Wal-Mart Stores Inc', 'name': 'organizations'}, {'rank': '8', 'is_major': 'N', 'value': 'Dynamite Data LLC', 'name': 'organizations'}]
no articlebody
True
https://www.nytimes.com/2012/12/03/us/winners-and-losers-in-texas.html
[{'rank': '1', 'is_major': 'Y', 'name': 'subject', 'value': 'Tax Credits, Deductions and Exemptions'}, {'rank': '2', 'is_major': 'Y', 'name': 'glocations', 'value': 'Texas'}, {'rank': '3', 'is_major': 'N',

[{'rank': '1', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'Untouchable: The Strange Life and Tragic Death of Michael Jackson (Book)', 'name': 'creative_works'}, {'rank': '3', 'is_major': 'N', 'value': 'Jackson, Michael', 'name': 'persons'}, {'rank': '4', 'is_major': 'N', 'value': 'Book Trade and Publishing', 'name': 'subject'}, {'rank': '5', 'is_major': 'Y', 'value': 'Sullivan, Randall', 'name': 'persons'}, {'rank': '6', 'is_major': 'N', 'value': 'Grove Press', 'name': 'organizations'}, {'rank': '7', 'is_major': 'Y', 'value': 'Books and Literature', 'name': 'subject'}]
no articlebody
no story body text
True
https://gadgetwise.blogs.nytimes.com/2013/01/29/qa-reading-google-books-on-an-iphone/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Barnes & Noble Inc'}, {'rank': '4', 'name': 'organiza

no articlebody
no story body text
True
https://www.nytimes.com/2013/03/29/technology/amazon-and-overstock-lose-challenge-to-online-sales-tax.html
[{'rank': '1', 'is_major': 'Y', 'value': 'E-Commerce', 'name': 'subject'}, {'rank': '2', 'is_major': 'Y', 'value': 'Sales and Excise Taxes', 'name': 'subject'}, {'rank': '3', 'is_major': 'Y', 'value': 'Overstock.com Inc', 'name': 'organizations'}, {'rank': '4', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '5', 'is_major': 'N', 'value': 'Decisions and Verdicts', 'name': 'subject'}, {'rank': '6', 'is_major': 'Y', 'value': 'New York State', 'name': 'glocations'}]
no articlebody
no story body text
True
2013 4
https://bits.blogs.nytimes.com/2013/04/01/emcs-amazon-challenger-comes-out/
[{'rank': '1', 'name': 'persons', 'value': 'Maritz, Paul'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'EMC Corporation'}, {'rank': '3', 'name': 'organizations',

[{'rank': '1', 'is_major': 'Y', 'value': 'Viacom Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '3', 'is_major': 'Y', 'value': 'Computers and the Internet', 'name': 'subject'}, {'rank': '4', 'is_major': 'N', 'value': 'Nick Jr (TV Network)', 'name': 'organizations'}, {'rank': '5', 'is_major': 'Y', 'value': 'Television', 'name': 'subject'}, {'rank': '6', 'is_major': 'N', 'value': 'Netflix Inc', 'name': 'organizations'}]
no articlebody
no story body text
True
https://dealbook.nytimes.com/2013/06/11/hggc-strikes-deal-for-mywebgrocer/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'HGGC LLC'}, {'rank': '3', 'name': 'organizations', 'value': 'MyWebGrocer.com'}, {'rank': '1', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '2', 'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures'}, {'rank': '3', 'name': 'subject', 'value': 'Priv

[{'rank': '5', 'is_major': 'Y', 'value': 'Organized Labor', 'name': 'subject'}, {'rank': '3', 'is_major': 'Y', 'value': 'Germany', 'name': 'glocations'}, {'rank': '1', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'Labor and Jobs', 'name': 'subject'}, {'rank': '4', 'is_major': 'N', 'value': 'Wages and Salaries', 'name': 'subject'}]
no articlebody
no story body text
True
https://artsbeat.blogs.nytimes.com/2013/08/06/amazon-expands-to-sell-art-online/
[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'is_major': 'Y', 'name': 'subject', 'value': 'Art'}]
True
https://bits.blogs.nytimes.com/2013/08/06/the-tech-sectors-mixed-response-to-bezoss-purchase-of-the-post/
[{'rank': '1', 'name': 'persons', 'value': 'Bezos, Jeffrey P'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Washington Post Company'}, {'rank': '1'

https://www.nytimes.com/interactive/2013/10/03/business/dealbook/Measuring-a-Tweets-Worth.html
[{'value': 'Amazon.com Inc', 'is_major': 'N', 'rank': '3', 'name': 'organizations'}, {'value': 'Stocks and Bonds', 'is_major': 'N', 'rank': '4', 'name': 'subject'}, {'value': 'Twitter', 'is_major': 'N', 'rank': '1', 'name': 'organizations'}, {'value': 'Facebook Inc', 'is_major': 'N', 'rank': '2', 'name': 'organizations'}]
no articlebody
no story body text
no css tag
False
https://pogue.blogs.nytimes.com/2013/10/03/a-new-kindle-fire-just-in-time-for-the-holiday-season/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '1', 'name': 'subject', 'value': 'Amazon Kindle'}, {'rank': '2', 'name': 'subject', 'value': 'Tablet Computers'}]
True
https://www.nytimes.com/2013/10/04/business/France-Takes-Aim-at-Amazon-to-Protect-Local-Bookshops.html
[{'rank': '1', 'is_major': 'N', 'value': 'E-Commerce', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Amazon.com Inc', 

https://www.nytimes.com/video/technology/100000002581467/amazon-prime-air.html
[{'rank': '1', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Drones (Pilotless Planes)'}]
no articlebody
no story body text
no css tag
False
https://economix.blogs.nytimes.com/2013/12/02/robots-and-property-values/
[{'rank': '1', 'name': 'type_of_material', 'value': 'News'}, {'rank': '2', 'name': 'subject', 'value': 'Automobiles'}, {'rank': '3', 'name': 'subject', 'value': 'Delivery Services'}, {'rank': '4', 'name': 'subject', 'value': 'Drones (Pilotless Planes)'}, {'rank': '5', 'name': 'subject', 'value': 'Real Estate and Housing (Residential)'}, {'rank': '6', 'name': 'subject', 'value': 'Urban Areas'}, {'rank': '7', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '8', 'name': 'unknown', 'value': 'Amazon.com Inc|AMZN|NASDAQ'}]
True
https://dealbook.nytimes.com/2013/12/02/amazons-blue-sky-thinking/
[{'rank': '1'

[{'value': 'Computers and the Internet', 'is_major': 'Y', 'rank': '1', 'name': 'subject'}, {'value': 'Apple Inc', 'is_major': 'Y', 'rank': '2', 'name': 'organizations'}, {'value': 'Google Inc', 'is_major': 'Y', 'rank': '3', 'name': 'organizations'}, {'value': 'Evernote Corp', 'is_major': 'N', 'rank': '4', 'name': 'organizations'}, {'value': 'Dropbox Inc', 'is_major': 'N', 'rank': '5', 'name': 'organizations'}, {'value': 'Amazon.com Inc', 'is_major': 'Y', 'rank': '6', 'name': 'organizations'}]
no articlebody
no story body text
True
https://india.blogs.nytimes.com/2014/02/21/snapdeal-the-ebay-of-india-sees-plenty-of-room-to-grow/
[{'rank': '4', 'name': 'glocations', 'value': 'New Delhi (India)'}, {'rank': '3', 'name': 'glocations', 'value': 'India'}, {'rank': '2', 'name': 'glocations', 'value': 'China'}, {'rank': '5', 'name': 'glocations', 'value': 'Silicon Valley (Calif)'}, {'rank': '1', 'name': 'glocations', 'value': 'Bangalore (India)'}, {'rank': '5', 'name': 'organizations', 'value':

[{'rank': '1', 'name': 'persons', 'value': 'Raymond, Eric S'}, {'rank': '2', 'name': 'persons', 'value': 'Vixie, Paul'}, {'rank': '1', 'name': 'glocations', 'value': 'San Francisco (Calif)'}, {'rank': '11', 'name': 'organizations', 'value': 'Microsoft Corporation'}, {'rank': '14', 'name': 'organizations', 'value': 'Yahoo! Inc'}, {'rank': '13', 'name': 'organizations', 'value': 'VMware Inc'}, {'rank': '12', 'name': 'organizations', 'value': 'Qualcomm Inc'}, {'rank': '5', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '6', 'name': 'organizations', 'value': 'Federal Bureau of Investigation'}, {'rank': '8', 'name': 'organizations', 'value': 'Intel Corporation'}, {'rank': '7', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '9', 'name': 'organizations', 'value': 'International Business Machines Corporation'}, {'rank': '2', 'name': 'organizations', 'value': 'Cisco Systems Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Defense Department'}, {'rank': '4', 'name'

https://www.nytimes.com/2014/05/31/opinion/how-book-publishers-can-beat-amazon.html
[{'rank': '5', 'is_major': 'N', 'value': 'Prices (Fares, Fees and Rates)', 'name': 'subject'}, {'rank': '9', 'is_major': 'N', 'value': 'Apple Inc', 'name': 'organizations'}, {'rank': '7', 'is_major': 'Y', 'value': 'Book Trade and Publishing', 'name': 'subject'}, {'rank': '6', 'is_major': 'Y', 'value': 'Antitrust Laws and Competition Issues', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Hachette Book Group', 'name': 'organizations'}, {'rank': '8', 'is_major': 'N', 'value': 'Suits and Litigation (Civil)', 'name': 'subject'}, {'rank': '1', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'Justice Department', 'name': 'organizations'}, {'rank': '4', 'is_major': 'N', 'value': 'E-Books and Readers', 'name': 'subject'}]
no articlebody
no story body text
True
2014 6
https://www.nytimes.com/2014/06/02/business/media/amazon-and-a-mattel-

no story body text
True
https://www.nytimes.com/2014/06/25/business/media/hachette-said-to-be-near-deal-for-perseus-books.html
[{'rank': '5', 'is_major': 'N', 'value': 'Mergers, Acquisitions and Divestitures', 'name': 'subject'}, {'rank': '8', 'is_major': 'Y', 'value': 'E-Books and Readers', 'name': 'subject'}, {'rank': '1', 'is_major': 'Y', 'value': 'Hachette Book Group', 'name': 'organizations'}, {'rank': '3', 'is_major': 'Y', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '4', 'is_major': 'Y', 'value': 'Book Trade and Publishing', 'name': 'subject'}, {'rank': '7', 'is_major': 'N', 'value': 'Pietsch, Michael', 'name': 'persons'}, {'rank': '2', 'is_major': 'Y', 'value': 'Perseus Books', 'name': 'organizations'}, {'rank': '6', 'is_major': 'N', 'value': 'Colbert, Stephen', 'name': 'persons'}]
no articlebody
no story body text
True
https://www.nytimes.com/2014/06/25/business/international/amazon-accused-in-Germany-of-antitrust-violation.html
[{'rank': '5', 'is_major': 'Y',

[{'rank': '2', 'name': 'organizations', 'value': 'Hachette Book Group'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'name': 'subject', 'value': 'Prices (Fares, Fees and Rates)'}, {'rank': '2', 'name': 'subject', 'value': 'E-Books and Readers'}, {'rank': '1', 'name': 'subject', 'value': 'Book Trade and Publishing'}]
True
https://www.nytimes.com/2014/08/01/opinion/All-Knowing-Amazon.html
[{'rank': '4', 'is_major': 'N', 'value': 'Amazon.com Inc', 'name': 'organizations'}, {'rank': '2', 'is_major': 'N', 'value': 'E-Books and Readers', 'name': 'subject'}, {'rank': '5', 'is_major': 'N', 'value': 'Kristof, Nicholas D', 'name': 'persons'}, {'rank': '1', 'is_major': 'N', 'value': 'Amazon Kindle', 'name': 'subject'}, {'rank': '3', 'is_major': 'N', 'value': 'Privacy', 'name': 'subject'}]
no articlebody
no story body text
True
https://bits.blogs.nytimes.com/2014/08/03/cloud-revenue-jumps-led-by-microsoft-and-ibm/
[{'rank': '3', 'is_major': 'Y', 'value': 'Micro

[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '11', 'is_major': 'N', 'name': 'creative_works', 'value': 'Red Oaks (TV Program)'}, {'rank': '2', 'is_major': 'Y', 'name': 'subject', 'value': 'Web-Original Programming'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Television'}, {'rank': '4', 'is_major': 'N', 'name': 'persons', 'value': 'Stillman, Whit'}, {'rank': '5', 'is_major': 'N', 'name': 'persons', 'value': 'Forster, Marc'}, {'rank': '6', 'is_major': 'N', 'name': 'persons', 'value': 'Green, David Gordon'}, {'rank': '7', 'is_major': 'N', 'name': 'creative_works', 'value': 'Hand of God (TV Program)'}, {'rank': '8', 'is_major': 'N', 'name': 'creative_works', 'value': 'Hysteria (TV Program)'}, {'rank': '9', 'is_major': 'N', 'name': 'creative_works', 'value': 'The Cosmopolitans (TV Program)'}, {'rank': '10', 'is_major': 'N', 'name': 'creative_works', 'value': 'Really (TV Program)'}]
no articlebody
no story body text
True
2014

[{'value': 'Brown, Sandra', 'name': 'persons', 'rank': '1'}, {'value': 'Ryan, Paul D Jr', 'name': 'persons', 'rank': '2'}, {'value': 'Schulman, Daniel', 'name': 'persons', 'rank': '3'}, {'value': 'Smiley, Tavis', 'name': 'persons', 'rank': '4'}, {'value': 'Amazon.com Inc', 'name': 'organizations', 'rank': '1'}, {'value': 'Hachette Book Group', 'name': 'organizations', 'rank': '2'}, {'value': 'Book Trade and Publishing', 'name': 'subject', 'rank': '1'}, {'value': 'E-Commerce', 'name': 'subject', 'rank': '2'}, {'value': 'Shopping and Retail', 'name': 'subject', 'rank': '3'}]
True
2014 10
https://bits.blogs.nytimes.com/2014/10/02/att-wants-you-to-put-down-the-phone-and-head-for-the-cloud/
[{'rank': '1', 'name': 'persons', 'value': 'de la Vega, Ralph'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'AT&T Inc'}, {'rank': '1', 'name': 'subject', 'value': 'Cloud Computing'}, {'rank': '2', 'name': 'subject', 'value': 'Enterpr

[{'value': 'Flippen, Alan', 'name': 'persons', 'rank': '1'}, {'value': 'Gessen, Keith', 'name': 'persons', 'rank': '2'}, {'value': 'Rakoff, Jed S', 'name': 'persons', 'rank': '3'}, {'value': 'Smith, Ben E', 'name': 'persons', 'rank': '4'}, {'value': 'Smith, Dave', 'name': 'persons', 'rank': '5'}, {'value': 'Stewart, Jon', 'name': 'persons', 'rank': '6'}, {'value': 'Wortham, Jenna', 'name': 'persons', 'rank': '7'}, {'value': 'United States', 'name': 'glocations', 'rank': '1'}, {'value': 'Amazon.com Inc', 'name': 'organizations', 'rank': '1'}, {'value': 'BuzzFeed Inc', 'name': 'organizations', 'rank': '2'}, {'value': 'Hachette Book Group', 'name': 'organizations', 'rank': '3'}, {'value': 'IMG Worldwide', 'name': 'organizations', 'rank': '4'}, {'value': 'New York Review of Books', 'name': 'organizations', 'rank': '5'}, {'value': 'New York Times', 'name': 'organizations', 'rank': '6'}, {'value': 'Texas State University', 'name': 'organizations', 'rank': '7'}, {'value': 'Vanity Fair', 'name

[{'value': 'Web-Original Programming', 'is_major': 'Y', 'rank': '1', 'name': 'subject'}, {'value': 'Kirke, Lola', 'is_major': 'N', 'rank': '11', 'name': 'persons'}, {'value': 'Mozart in the Jungle (Web-Original Program)', 'is_major': 'Y', 'rank': '12', 'name': 'creative_works'}, {'value': 'Amazon.com Inc', 'is_major': 'Y', 'rank': '2', 'name': 'organizations'}, {'value': 'Television', 'is_major': 'Y', 'rank': '3', 'name': 'subject'}, {'value': 'Garcia Bernal, Gael', 'is_major': 'N', 'rank': '4', 'name': 'persons'}, {'value': 'Tindall, Blair', 'is_major': 'N', 'rank': '5', 'name': 'persons'}, {'value': 'Timbers, Alex', 'is_major': 'N', 'rank': '6', 'name': 'persons'}, {'value': 'Schwartzman, Jason', 'is_major': 'N', 'rank': '7', 'name': 'persons'}, {'value': 'Coppola, Roman', 'is_major': 'N', 'rank': '8', 'name': 'persons'}, {'value': 'Burrows, Saffron', 'is_major': 'N', 'rank': '9', 'name': 'persons'}, {'value': 'McDowell, Malcolm', 'is_major': 'N', 'rank': '10', 'name': 'persons'}]
no

[{'rank': '1', 'is_major': 'Y', 'name': 'subject', 'value': 'Drones (Pilotless Planes)'}, {'rank': '2', 'is_major': 'Y', 'name': 'organizations', 'value': 'Federal Aviation Administration'}, {'rank': '3', 'is_major': 'Y', 'name': 'subject', 'value': 'Delivery Services'}, {'rank': '4', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'Y', 'name': 'persons', 'value': 'Obama, Barack'}, {'rank': '6', 'is_major': 'Y', 'name': 'subject', 'value': 'Executive Orders and Memorandums'}, {'rank': '7', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '8', 'is_major': 'N', 'name': 'subject', 'value': 'Regulation and Deregulation of Industry'}, {'rank': '9', 'is_major': 'N', 'name': 'persons', 'value': 'Shane, Scott'}]
no articlebody
no story body text
True
https://artsbeat.blogs.nytimes.com/2015/02/18/amazon-orders-full-seasons-for-five-new-shows/
[{'rank': '1', 'name': 'persons', 'value': 'Gibney, Alex'}, {'rank': '2', 'name'

[{'rank': '1', 'is_major': 'N', 'name': 'organizations', 'value': 'European Union'}, {'rank': '11', 'is_major': 'N', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '12', 'is_major': 'N', 'name': 'organizations', 'value': 'Microsoft Corporation'}, {'rank': '13', 'is_major': 'N', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '14', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '15', 'is_major': 'N', 'name': 'organizations', 'value': 'Qualcomm Inc'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'European Commission'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'European Court of Justice'}, {'rank': '4', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '5', 'is_major': 'N', 'name': 'subject', 'value': 'Tax Shelters'}, {'rank': '6', 'is_major': 'N', 'name': 'subject', 'value': 'Corporate Taxes'}, {'rank': '7', 'is_major': 'N', 'name': 'subject', 'value': 'Pri

[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'eMarketer Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Newegg Inc'}, {'rank': '1', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '2', 'name': 'subject', 'value': 'Shopping and Retail'}]
True
https://www.nytimes.com/2015/06/06/business/walmart-lagging-in-online-sales-is-strengthening-e-commerce.html
[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Walmart Stores Inc'}, {'rank': '2', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '3', 'is_major': 'Y', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '4', 'is_major': 'N', 'name': 'subject', 'value': 'Shopping and Retail'}]
no articlebody
no story body text
True
https://bits.blogs.nytimes.com/2015/06/12/youtube-takes-on-amazon-with-new-gaming-app/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'va

[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'is_major': 'Y', 'name': 'subject', 'value': 'Black Friday and Cyber Monday (Shopping)'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '4', 'is_major': 'N', 'name': 'subject', 'value': 'Prices (Fares, Fees and Rates)'}, {'rank': '5', 'is_major': 'N', 'name': 'subject', 'value': 'Discount Selling'}]
no articlebody
no story body text
True
https://tmagazine.blogs.nytimes.com/2015/07/24/limited-too-project-runway-emmy-fiorucci-style-news/
[{'rank': '1', 'name': 'persons', 'value': 'Fiorucci, Elio (1935-2015)'}, {'rank': '2', 'name': 'persons', 'value': 'Friedman, Vanessa V'}, {'rank': '3', 'name': 'persons', 'value': 'Gindi, Ralph'}, {'rank': '4', 'name': 'persons', 'value': 'McQueen, Alexander'}, {'rank': '1', 'name': 'glocations', 'value': 'Brazil'}, {'rank': '2', 'name': 'glocations', 'value': 'China'}, {'rank': '1', 'name': 'organizations', 'value': '

[{'rank': '1', 'name': 'persons', 'value': 'Branch, John'}, {'rank': '2', 'name': 'persons', 'value': 'Eligon, John'}, {'rank': '3', 'name': 'persons', 'value': 'Fink, Sheri'}, {'rank': '4', 'name': 'persons', 'value': 'Knowles, Beyonce'}, {'rank': '5', 'name': 'persons', 'value': 'Richardson, Lynda'}, {'rank': '6', 'name': 'persons', 'value': 'Wortham, Jenna'}, {'rank': '1', 'name': 'glocations', 'value': 'Yukon Territory'}, {'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'National Public Radio'}, {'rank': '3', 'name': 'organizations', 'value': 'New York Times'}, {'rank': '4', 'name': 'organizations', 'value': 'SB Nation'}, {'rank': '5', 'name': 'organizations', 'value': 'Vogue'}, {'rank': '6', 'name': 'organizations', 'value': 'Washington Post'}, {'rank': '1', 'name': 'subject', 'value': 'Deaths (Fatalities)'}, {'rank': '2', 'name': 'subject', 'value': 'E-Books and Readers'}, {'rank': '3', 'name': 'subject', 'value': 

https://www.nytimes.com/2015/09/19/technology/personaltech/shopping-for-amazon-apps-with-android.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Mobile Applications'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Android (Operating System)'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Microsoft Corp'}]
no articlebody
no story body text
True
https://artsbeat.blogs.nytimes.com/2015/09/23/amazon-announces-new-pilots-from-louis-c-k-tig-notaro-and-sacha-baron-cohen/
[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '1', 'name': 'subject', 'value': 'Web-Original Programming'}]
True
https://www.nytimes.com/2015/09/23/business/media/the-plot-twist-e-book-sales-slip-and-print-is-far-from-dead.html
[{'rank': '1', 'is_major': 'Y', 'name': 'subj

https://www.nytimes.com/2015/10/28/world/middleeast/walmart-withdraws-hooked-sheik-fagin-nose-from-halloween-store.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Halloween'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'Walmart Stores Inc'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'American-Arab Anti-Discrimination Committee'}, {'rank': '4', 'is_major': 'N', 'name': 'subject', 'value': 'Anti-Semitism'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '6', 'is_major': 'N', 'name': 'glocations', 'value': 'Israel'}, {'rank': '7', 'is_major': 'N', 'name': 'persons', 'value': 'Dickens, Charles'}, {'rank': '8', 'is_major': 'N', 'name': 'persons', 'value': 'Macklemore (Rapper)'}]
no articlebody
no story body text
True
https://www.nytimes.com/interactive/2015/10/28/technology/personaltech/How-Apple-TV-Stacks-Up.html
[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Television S

[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Waste Materials and Disposal'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Recycling of Waste Materials'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'Electronics'}, {'isMajor': 'N', 'rank': 4, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 5, 'name': 'organizations', 'value': 'Best BUY Company Inc'}, {'isMajor': 'N', 'rank': 6, 'name': 'organizations', 'value': 'Gazelle Inc'}, {'isMajor': 'N', 'rank': 7, 'name': 'subject', 'value': 'Hazardous and Toxic Substances'}, {'isMajor': 'N', 'rank': 8, 'name': 'subject', 'value': 'Gift Cards and Certificates'}, {'isMajor': 'N', 'rank': 9, 'name': 'organizations', 'value': 'PayPal'}]
no articlebody
no story body text
True
https://www.nytimes.com/2015/12/06/your-money/all-the-product-reviews-money-can-buy.html
[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Fiverr International Ltd'}, {'rank': '2', 'is_major': 'Y

[{'rank': '1', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '3', 'name': 'organizations', 'value': 'Microsoft Corp'}, {'rank': '1', 'name': 'subject', 'value': 'Cloud Computing'}, {'rank': '2', 'name': 'subject', 'value': 'Company Reports'}]
True
https://www.nytimes.com/2016/01/29/technology/amazon-earnings.html
[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Company Reports'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'E-Commerce'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/01/30/movies/sundance-fights-tide-with-films-like-the-birth-of-a-nation.html
[{'rank': '1', 'is_major': 'Y', 'name': 'subject', 'value': 'Sundance Film Festival (Park City, Utah)'}, {'rank': '11', 'is_major': 'N', 'name': 'persons', 'value': 'Hartigan, Chad'}, {'rank': '12', 'is_major': 'N', 'name

[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Computer Security'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'Apple Inc'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Yahoo! Inc'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/03/07/technology/tech-companies-new-and-old-clamor-to-entice-cloud-computing-experts.html
[{'rank': '1', 'is_major': 'Y', 'name': 'subject', 'value': 'Cloud Computing'}, {'rank': '2', 'is_major': 'Y', 'name': 'subject', 'value': 'Hiring and Promotion'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Wages and Salaries'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Alphabet Inc'}, {'rank': '6', 'is_major

[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/04/03/arts/television/on-catastrophe-a-monster-in-law-and-more-chaos.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Television'}, {'rank': '2', 'is_major': 'N', 'name': 'persons', 'value': 'Delaney, Rob'}, {'rank': '3', 'is_major': 'N', 'name': 'persons', 'value': 'Horgan, Sharon (1970- )'}, {'rank': '4', 'is_major': 'N', 'name': 'creative_works', 'value': 'Catastrophe (TV Program)'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/04/05/busin

https://www.nytimes.com/2016/05/13/business/office-depot-staples-merger-antitrust-amazon.html
[{'rank': '1', 'is_major': 'Y', 'name': 'organizations', 'value': 'Federal Trade Commission'}, {'rank': '2', 'is_major': 'Y', 'name': 'organizations', 'value': 'Office Depot Inc.'}, {'rank': '3', 'is_major': 'Y', 'name': 'organizations', 'value': 'Staples Inc'}, {'rank': '4', 'is_major': 'Y', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'Y', 'name': 'subject', 'value': 'Office Supplies and Equipment'}, {'rank': '6', 'is_major': 'Y', 'name': 'subject', 'value': 'Antitrust Laws and Competition Issues'}, {'rank': '7', 'is_major': 'N', 'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures'}, {'rank': '8', 'is_major': 'N', 'name': 'subject', 'value': 'Shopping and Retail'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/05/17/technology/amazon-proves-infertile-soil-for-unions-so-far.html
[{'rank': '1', 'is_major': 'Y', 'name': 'su

https://www.nytimes.com/2016/06/29/technology/personaltech/downloading-video-streams-to-go.html
[{'rank': '1', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'Hulu.com'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Netflix Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'PlayOn'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Microsoft Corp'}, {'rank': '6', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Play'}, {'rank': '7', 'is_major': 'N', 'name': 'organizations', 'value': 'YouTube.com'}, {'rank': '8', 'is_major': 'N', 'name': 'organizations', 'value': 'iTunes'}, {'rank': '9', 'is_major': 'Y', 'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming'}, {'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming'}, {'rank': '2', 'is_major': 'N', 'name': 'organization

[{'rank': '1', 'is_major': 'N', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'Twitter'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Yahoo! Inc'}, {'rank': '6', 'is_major': 'N', 'name': 'organizations', 'value': 'Microsoft Corp'}, {'rank': '7', 'is_major': 'N', 'name': 'organizations', 'value': 'Apple Inc'}]
no articlebody
no story body text
True
2016 8
https://www.nytimes.com/2016/08/01/us/winner-in-trump-feud-with-khan-family-the-constitution.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Constitution (US)'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Presidential Election of 2016'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Books and Literature'}, {'rank': '4', 'is_major': 'N',

https://www.nytimes.com/2016/09/29/technology/protecting-humans-and-jobs-from-robots-is-5-tech-giants-goal.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Artificial Intelligence'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Regulation and Deregulation of Industry'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Facebook Inc'}, {'rank': '6', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '7', 'is_major': 'N', 'name': 'organizations', 'value': 'International Business Machines Corporation'}, {'rank': '8', 'is_major': 'N', 'name': 'organizations', 'value': 'Microsoft Corp'}, {'rank': '9', 'is_major': 'N', 'name': 'organizations', 'value': 'Partnership on AI'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/09/29/fas

no articlebody
no story body text
True
https://www.nytimes.com/2016/11/15/technology/personaltech/how-to-stream-video-on-an-older-tv.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Television Sets and Media Devices'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Apple TV'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '6', 'is_major': 'N', 'name': 'organizations', 'value': 'Roku'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/11/17/technology/personaltech/how-not-to-overpay-on-black-friday-let-the-web-be-your-guide.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Shopping and Retail'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Black Friday and Cyber Monday (Shop

https://www.nytimes.com/2016/12/27/business/dealbook/walmart-slowly-makes-strides-in-e-commerce.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'E-Commerce'}, {'rank': '2', 'is_major': 'N', 'name': 'organizations', 'value': 'Walmart Stores Inc'}, {'rank': '3', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '4', 'is_major': 'N', 'name': 'organizations', 'value': 'Jet.com Inc'}, {'rank': '5', 'is_major': 'N', 'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures'}, {'rank': '6', 'is_major': 'N', 'name': 'subject', 'value': 'Shopping and Retail'}]
no articlebody
no story body text
True
https://www.nytimes.com/2016/12/29/movies/streaming-movies-for-the-soused-or-now-sober.html
[{'rank': '1', 'is_major': 'N', 'name': 'subject', 'value': 'Movies'}, {'rank': '11', 'is_major': 'N', 'name': 'creative_works', 'value': 'The Thin Man (Movie)'}, {'rank': '12', 'is_major': 'N', 'name': 'creative_works', 'value': 'When a Man Loves a Woman

[{'rank': '1', 'is_major': 'N', 'name': 'persons', 'value': 'Trump, Donald J'}, {'rank': '2', 'is_major': 'N', 'name': 'subject', 'value': 'Immigration and Emigration'}, {'rank': '3', 'is_major': 'N', 'name': 'subject', 'value': 'Executive Orders and Memorandums'}, {'rank': '4', 'is_major': 'N', 'name': 'subject', 'value': 'Demonstrations, Protests and Riots'}, {'rank': '5', 'is_major': 'N', 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'rank': '6', 'is_major': 'N', 'name': 'organizations', 'value': 'Expedia Inc'}, {'rank': '7', 'is_major': 'N', 'name': 'organizations', 'value': 'Google Inc'}, {'rank': '8', 'is_major': 'N', 'name': 'subject', 'value': 'Computers and the Internet'}, {'rank': '9', 'is_major': 'N', 'name': 'subject', 'value': 'Foreign Workers'}]
no articlebody
no story body text
True
2017 2
https://www.nytimes.com/2017/02/02/business/amazon-quarter-revenue.html
[{'name': 'subject', 'value': 'E-Commerce', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value':

[{'isMajor': 'N', 'rank': 1, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Fashion and Apparel'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'E-Commerce'}, {'isMajor': 'N', 'rank': 4, 'name': 'subject', 'value': 'Shopping and Retail'}, {'isMajor': 'N', 'rank': 5, 'name': 'subject', 'value': 'Computers and the Internet'}, {'isMajor': 'N', 'rank': 6, 'name': 'persons', 'value': 'Bezos, Jeffrey P'}]
no articlebody
no story body text
True
2017 5
https://www.nytimes.com/2017/05/01/sports/basketball/wnba-twitter-streaming.html
[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Social Media'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'Basketball'}, {'isMajor': 'N', 'rank': 4, 'name': 'persons', 'value': 'Borders, Lisa M (1957- )'}, {'isMajor': 'N', 'rank': 5, 'name': 'organizations', 'value': 'A

https://www.nytimes.com/2017/06/07/opinion/the-antithesis-of-amazons-retail-shop.html
[{'isMajor': 'N', 'rank': 1, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Shopping and Retail'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/06/07/business/dealbook/stock-market-facebook-amazon-apple-google-netflix.html
[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Stocks and Bonds'}, {'isMajor': 'N', 'rank': 2, 'name': 'organizations', 'value': 'Alphabet Inc'}, {'isMajor': 'N', 'rank': 3, 'name': 'organizations', 'value': 'Facebook Inc'}, {'isMajor': 'N', 'rank': 4, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 5, 'name': 'organizations', 'value': 'Netflix Inc'}, {'isMajor': 'N', 'rank': 6, 'name': 'organizations', 'value': 'Google Inc'}, {'isMajor': 'N', 'rank': 7, 'name': 'organizations', 'value': 'Apple Inc'}, {'isMajor': 'N', 'rank': 8, 'name': 'subject', 'value': '

[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Corporations'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Executives and Management (Theory)'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'Appointments and Executive Changes'}, {'isMajor': 'N', 'rank': 4, 'name': 'organizations', 'value': 'General Electric Company'}, {'isMajor': 'N', 'rank': 5, 'name': 'persons', 'value': 'Immelt, Jeffrey R'}, {'isMajor': 'N', 'rank': 6, 'name': 'persons', 'value': 'Welch, John F Jr'}, {'isMajor': 'N', 'rank': 7, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 8, 'name': 'organizations', 'value': 'Whole Foods Market Inc'}, {'isMajor': 'N', 'rank': 9, 'name': 'subject', 'value': 'Shareholder Rights and Activism'}, {'isMajor': 'N', 'rank': 10, 'name': 'subject', 'value': 'Boards of Directors'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/06/17/technology/whole-foods-amazon-jeff-bezos.html
[{'isMajor': 'N', 'rank': 1

[{'isMajor': 'N', 'rank': 1, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'Artificial Intelligence'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/07/11/business/amazon-echo-911-emergency.html
[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'Home Automation and Smart Homes'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Nine-One-One (911) (Emergency Phone Number)'}, {'isMajor': 'N', 'rank': 3, 'name': 'organizations', 'value': 'Amazon.com Inc'}, {'isMajor': 'N', 'rank': 4, 'name': 'glocations', 'value': 'Albuquerque (NM)'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/07/10/business/dealbook/e-commerce-jobs-retailing.html
[{'isMajor': 'N', 'rank': 1, 'name': 'subject', 'value': 'E-Commerce'}, {'isMajor': 'N', 'rank': 2, 'name': 'subject', 'value': 'Labor and Jobs'}, {'isMajor': 'N', 'rank': 3, 'name': 'subject', 'value': 'Economics (Theory and Philosophy)'}

[{'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Whole Foods Market Inc', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Prices (Fares, Fees and Rates)', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Supermarkets and Grocery Stores', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Organic Foods and Products', 'rank': 6, 'major': 'N'}, {'name': 'subject', 'value': 'Customer Loyalty Programs', 'rank': 7, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/08/25/technology/farhad-and-mikes-week-in-review-apples-self-driving-shift.html
[{'name': 'subject', 'value': 'Computers and the Internet', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Apple Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 3, 'major': 'N'}, {

[{'name': 'subject', 'value': 'Initial Public Offerings', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'HelloFresh', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Blue Apron', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Frankfurt Stock Exchange', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Whole Foods Market Inc', 'rank': 6, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/10/11/insider/tech-column-dread.html
[{'name': 'subject', 'value': 'Computers and the Internet', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Smartphones', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Facebook Inc', 'rank': 4, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/10/11/technology/the-frigh

[{'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Urban Areas', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Labor and Jobs', 'rank': 3, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/10/26/business/big-tech-company-earnings.html
[{'name': 'subject', 'value': 'Company Reports', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Cloud Computing', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Alphabet Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Google Inc', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Microsoft Corp', 'rank': 6, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/10/26/arts/television/amazon-prime-fearless-review.html
[{'name': 'subject', 'value': 'Television', 'rank': 1, 'major': 'N'}, {'name': '

https://www.nytimes.com/2017/12/03/business/dealbook/cvs-is-said-to-agree-to-buy-aetna-reshaping-health-care-industry.html
[{'name': 'organizations', 'value': 'CVS Caremark Corporation', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Aetna Inc', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Health Insurance and Managed Care', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Drugstores', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Drugs (Pharmaceuticals)', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 7, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2017/12/04/world/australia/amazon-australian-debut.html
[{'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 1, 'major': 'N'}, {'name': 'glocations', 'value': 'Australia', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'val

[{'name': 'persons', 'value': 'Bezos, Jeffrey P', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Blue Origin', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Computers and the Internet', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'E-Commerce', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Newspapers', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Washington Post', 'rank': 7, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/01/14/business/bank-earnings-china-sundance.html
[{'name': 'subject', 'value': 'Banking and Financial Institutions', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Airlines and Airplanes', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Sundance Film Festival (Park City, Utah)', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Company Reports', 'rank': 4, 'major': 'N'}, {

[{'name': 'subject', 'value': 'Health Insurance and Managed Care', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Berkshire Hathaway Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'JPMorgan Chase & Company', 'rank': 4, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/01/30/upshot/can-amazon-and-friends-handle-health-care-theres-reason-for-doubt.html
[{'name': 'subject', 'value': 'Health Insurance and Managed Care', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Berkshire Hathaway Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'JPMorgan Chase & Company', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Labor and Jobs', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Medicine and Health', 'rank': 6, 'major': 'N'}]
n

[{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Television', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Web-Original Programming', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Columbia Pictures', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Netflix Inc', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Sony Corporation', 'rank': 7, 'major': 'N'}, {'name': 'persons', 'value': 'Lynton, Michael', 'rank': 8, 'major': 'N'}, {'name': 'persons', 'value': 'Pascal, Amy', 'rank': 9, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/03/07/technology/amazon-prime-medicaid.html
[{'name': 'subject', 'value': 'Shopping and Retail', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'E-Commerce', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Medicaid', 'rank': 3, 'major':

https://www.nytimes.com/2018/04/06/technology/kevin-week-in-tech.html
[{'name': 'subject', 'value': 'Computers and the Internet', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Facebook Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'YouTube.com', 'rank': 4, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/04/06/technology/personaltech/moving-your-e-book-collection-to-one-device.html
[{'name': 'subject', 'value': 'E-Books and Readers', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Barnes & Noble Inc', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Tablet Computers', 'rank': 4, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/04/10/business/farmers-ecommerce-amazon.html
[{'name': 'subject', 'value': 'Deliv

https://www.nytimes.com/2018/05/08/opinion/economy-trump-election.html
[{'name': 'subject', 'value': 'United States Economy', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Voting and Voters', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Presidential Election of 2016', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Minimum Wage', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Discrimination', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Barnes & Noble Inc', 'rank': 6, 'major': 'N'}, {'name': 'persons', 'value': 'Trump, Donald J', 'rank': 7, 'major': 'N'}, {'name': 'persons', 'value': 'Obama, Barack', 'rank': 8, 'major': 'N'}, {'name': 'subject', 'value': 'Race and Ethnicity', 'rank': 9, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 10, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/05/09/opinion/jeff-bezos-spend-131-billion.html
[{'name': 'subject', 'value': 'Private

https://www.nytimes.com/2018/06/12/technology/seattle-tax-amazon.html
[{'name': 'subject', 'value': 'City Councils', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Corporate Taxes', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 3, 'major': 'N'}, {'name': 'glocations', 'value': 'Seattle (Wash)', 'rank': 4, 'major': 'N'}, {'name': 'persons', 'value': 'Durkan, Jenny A', 'rank': 5, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/06/17/business/media/amazon-twitch-video-games.html
[{'name': 'organizations', 'value': 'Twitch Interactive Inc', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Computer and Video Games', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Video Recordings, Downloads and Streaming', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 4, 'major': 'N'}, {'name': 'persons', 'value': 'Blevins, Tyler (1991- )', 'rank': 5, 'major': 'N'}, {'

[{'name': 'subject', 'value': 'Computers and the Internet', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Social Media', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Start-ups', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Apple Inc', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Facebook Inc', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Google Inc', 'rank': 7, 'major': 'N'}, {'name': 'organizations', 'value': 'Instagram Inc', 'rank': 8, 'major': 'N'}, {'name': 'organizations', 'value': 'Uber Technologies Inc', 'rank': 9, 'major': 'N'}, {'name': 'organizations', 'value': 'Twitter', 'rank': 10, 'major': 'N'}, {'name': 'organizations', 'value': 'Netflix Inc', 'rank': 11, 'major': 'N'}, {'name': 'persons', 'value': 'Zuckerberg, Mark E', 'rank': 12, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/07/13/movi

[{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Doomsday', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Netflix Inc', 'rank': 3, 'major': 'N'}, {'name': 'creative_works', 'value': 'How It Ends (Movie)', 'rank': 4, 'major': 'N'}, {'name': 'creative_works', 'value': 'Anon (Movie)', 'rank': 5, 'major': 'N'}, {'name': 'creative_works', 'value': 'No Blade of Grass (Movie)', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 7, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/08/14/magazine/facebook-google-privacy-data.html
[{'name': 'subject', 'value': 'Privacy', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Data-Mining and Database Marketing', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Surveillance of Citizens by Government', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Politics and Government', 'rank': 4, 'major': 'N'}, {'

[{'name': 'subject', 'value': 'Russian Interference in 2016 US Elections and Ties to Trump Associates', 'rank': 1, 'major': 'N'}, {'name': 'persons', 'value': 'Warren, Elizabeth', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'United States Politics and Government', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Presidential Election of 2020', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Tax Cuts and Jobs Act (2017)', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Banking and Financial Institutions', 'rank': 6, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 7, 'major': 'N'}, {'name': 'organizations', 'value': 'Facebook Inc', 'rank': 8, 'major': 'N'}, {'name': 'organizations', 'value': 'Apple Inc', 'rank': 9, 'major': 'N'}, {'name': 'organizations', 'value': 'Goldman Sachs Group Inc', 'rank': 10, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/09/19/business/dealbook/tilray-cannabis.htm

[{'name': 'subject', 'value': 'United States International Relations', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Computers and the Internet', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Cyberwarfare and Defense', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Espionage and Intelligence Services', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': 'Apple Inc', 'rank': 6, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/10/09/technology/amazon-workers-pay-raise.html
[{'name': 'subject', 'value': 'Wages and Salaries', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Labor and Jobs', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Bonuses', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Stock Options and Purchase Plans', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 5, '

[{'name': 'subject', 'value': 'Book Trade and Publishing', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'E-Commerce', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'AbeBooks Inc', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Demonstrations, Protests and Riots', 'rank': 5, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/11/06/nyregion/amazon-long-island-city.html
[{'name': 'subject', 'value': 'Subways', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'glocations', 'value': 'Long Island City (Queens, NY)', 'rank': 3, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/11/06/opinion/amazon-hq2-split.html
[{'name': 'subject', 'value': 'Labor and Jobs', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Relocation of Business', 'rank': 2, 'maj

https://www.nytimes.com/2018/11/14/style/jonathan-adler-amazon.html
[{'name': 'subject', 'value': 'E-Commerce', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Interior Design and Furnishings', 'rank': 2, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 3, 'major': 'N'}, {'name': 'persons', 'value': 'Adler, Jonathan (1966- )', 'rank': 4, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/11/15/opinion/military-veterans-colleges-universities.html
[{'name': 'subject', 'value': 'Colleges and Universities', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Veterans', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Graduation Rates', 'rank': 3, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Capitalism (Theory and Philosophy)', 'rank': 5, 'major': 'N'}, {'name': 'subject', 'value': 'Corporations', 'rank': 6, 'major': 'N'}, {'name': 'sub

[{'name': 'subject', 'value': 'Athletics and Sports', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Media', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Mergers, Acquisitions and Divestitures', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Cable Television', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 5, 'major': 'N'}, {'name': 'organizations', 'value': '21st Century Fox', 'rank': 6, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/11/21/world/australia/amazon-australia-access-website.html
[{'name': 'subject', 'value': 'E-Commerce', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Shopping and Retail', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Black Friday and Cyber Monday (Shopping)', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'International Trade and World Market', 'rank': 4, 'major': 'N'}, {'name': 'subject', 'value': 'Delivery Services', 'rank

[{'name': 'organizations', 'value': 'Google Inc', 'rank': 1, 'major': 'N'}, {'name': 'organizations', 'value': 'Amazon.com Inc', 'rank': 2, 'major': 'N'}, {'name': 'subject', 'value': 'Computers and the Internet', 'rank': 3, 'major': 'N'}, {'name': 'subject', 'value': 'Real Estate (Commercial)', 'rank': 4, 'major': 'N'}, {'name': 'organizations', 'value': 'Taconic Investment Partners', 'rank': 5, 'major': 'N'}, {'name': 'persons', 'value': 'Hoylman, Brad M', 'rank': 6, 'major': 'N'}, {'name': 'persons', 'value': 'Johnson, Corey', 'rank': 7, 'major': 'N'}, {'name': 'glocations', 'value': 'Chelsea (Manhattan, NY)', 'rank': 8, 'major': 'N'}, {'name': 'glocations', 'value': 'New York City', 'rank': 9, 'major': 'N'}]
no articlebody
no story body text
True
https://www.nytimes.com/2018/12/16/business/economy/nashville-birmingham-amazon.html
[{'name': 'subject', 'value': 'Labor and Jobs', 'rank': 1, 'major': 'N'}, {'name': 'subject', 'value': 'Relocation of Business', 'rank': 2, 'major': 'N'},

In [98]:
# write to file with pickle (as binary)
name='amazon' # amazon, apple, disney, goldman_sachs, tesla
f = open('news_data/' + str(name) + '.pickle', 'wb')
newData = pickle.dump(textd, f)
# f.write(newData)
f.close()

In [103]:
# open file with pickle (as binary)
name='amazon'
f = open('news_data/' + str(name) + '.pickle', 'rb')
raw_articles = pickle.load(f)
f.close()