# Dependencies

In [12]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv
from tqdm import tqdm_notebook as tqdm
from bs4 import BeautifulSoup as bs
from splinter import Browser


# Scrape/Extract Fortune 500 Table

In [7]:
# Use Pandas to scrape the table containing stocks data.
url_stock = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

# Convert table to html
stock_table = pd.read_html(url_stock)

# Print table 
print(stock_table)

[    Symbol                         Security SEC filings  \
0      MMM                       3M Company     reports   
1      ABT              Abbott Laboratories     reports   
2     ABBV                      AbbVie Inc.     reports   
3     ABMD                      ABIOMED Inc     reports   
4      ACN                    Accenture plc     reports   
5     ATVI              Activision Blizzard     reports   
6     ADBE                Adobe Systems Inc     reports   
7      AMD       Advanced Micro Devices Inc     reports   
8      AAP               Advance Auto Parts     reports   
9      AES                         AES Corp     reports   
10     AMG    Affiliated Managers Group Inc     reports   
11     AFL                        AFLAC Inc     reports   
12       A         Agilent Technologies Inc     reports   
13     APD     Air Products & Chemicals Inc     reports   
14    AKAM          Akamai Technologies Inc     reports   
15     ALK             Alaska Air Group Inc     reports

[216 rows x 6 columns]]


## Type of Data & Convert to DataFrame

In [8]:
# Identify what type is the output data
type(stock_table)

# Ensure to select the first part of the list
stock_table_df = pd.DataFrame(stock_table[0])

# Save raw data to csv
stock_table_df.to_csv("../stocks_data/raw_fortune500table.csv", index=False, encoding='utf8')

stock_table_df.head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,ABIOMED Inc,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


## Make symbols to list

In [9]:
# Make he Symbol column to a list 
symbols_list = stock_table_df["Symbol"].values.tolist()
symbols_list


['MMM',
 'ABT',
 'ABBV',
 'ABMD',
 'ACN',
 'ATVI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AMG',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALXN',
 'ALGN',
 'ALLE',
 'AGN',
 'ADS',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AIV',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ARNC',
 'ANET',
 'AJG',
 'AIZ',
 'ATO',
 'T',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BHGE',
 'BLL',
 'BAC',
 'BK',
 'BAX',
 'BBT',
 'BDX',
 'BRK.B',
 'BBY',
 'BIIB',
 'BLK',
 'HRB',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BF.B',
 'CHRW',
 'COG',
 'CDNS',
 'CPB',
 'COF',
 'CPRI',
 'CAH',
 'KMX',
 'CCL',
 'CAT',
 'CBOE',
 'CBRE',
 'CBS',
 'CE',
 'CELG',
 'CNC',
 'CNP',
 'CTL',
 'CERN',
 'CF',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'XEC',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CM

# Extract Daily Stocks Data for fortune 500

In [31]:
# base_url = "https://cloud.iexapis.com/stable/stock/"
# key_token = "/quote?token=pk_e4c3fa15e49a4efca388fcfbcece0eac"

# # Empty list to store data
# stocks_data = []

# # Start counter for the loop

# symbol_counter = 1

# #symbols_list2 = ['MMM','ABT','AIG','HAL','SLB'] # test list

# #Print statement to inform when data acquisition start
# print(f"Beginning Data Acquisition!!!")
# print(f"------------&&&&-------------------")

# for symbol in tqdm(symbols_list): #Note that we are using the symbol list
#     try: 
#         response = requests.get(base_url+symbol+key_token).json()
#         stocks_data.append(response)
        
#         print(f"Data Acquisition {symbol_counter} | Stock Symbol : {symbol}")
#         symbol_counter = symbol_counter + 1
#     except:
#         print("Stock not found...!!")
#     continue
    
# # Print statement to inform when all records have been acquired.
# print(f"------------&&&&---------------")
# print(f"Data Acquisition Complete")
# print(f"------------&&&&---------------")      


Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

Data Acquisition 1 | Stock Symbol : MMM
Data Acquisition 2 | Stock Symbol : ABT
Data Acquisition 3 | Stock Symbol : ABBV
Data Acquisition 4 | Stock Symbol : ABMD
Data Acquisition 5 | Stock Symbol : ACN
Data Acquisition 6 | Stock Symbol : ATVI
Data Acquisition 7 | Stock Symbol : ADBE
Data Acquisition 8 | Stock Symbol : AMD
Data Acquisition 9 | Stock Symbol : AAP
Data Acquisition 10 | Stock Symbol : AES
Data Acquisition 11 | Stock Symbol : AMG
Data Acquisition 12 | Stock Symbol : AFL
Data Acquisition 13 | Stock Symbol : A
Data Acquisition 14 | Stock Symbol : APD
Data Acquisition 15 | Stock Symbol : AKAM
Data Acquisition 16 | Stock Symbol : ALK
Data Acquisition 17 | Stock Symbol : ALB
Data Acquisition 18 | Stock Symbol : ARE
Data Acquisition 19 | Stock Symbol : ALXN
Data Acquisition 20 | Stock Symbol : ALGN
Data Acquisition 21 | Stock Symbol : ALLE
Data Acquisition 22 | Stock Symbol : AGN
Data Acquisition 23 | Stock Symbol : ADS
Data Acquisition 24 | Stock Symbol : LNT
Data Acquisition 25

Data Acquisition 197 | Stock Symbol : FITB
Data Acquisition 198 | Stock Symbol : FE
Data Acquisition 199 | Stock Symbol : FRC
Data Acquisition 200 | Stock Symbol : FISV
Data Acquisition 201 | Stock Symbol : FLT
Data Acquisition 202 | Stock Symbol : FLIR
Data Acquisition 203 | Stock Symbol : FLS
Data Acquisition 204 | Stock Symbol : FMC
Data Acquisition 205 | Stock Symbol : F
Data Acquisition 206 | Stock Symbol : FTNT
Data Acquisition 207 | Stock Symbol : FTV
Data Acquisition 208 | Stock Symbol : FBHS
Data Acquisition 209 | Stock Symbol : FOXA
Data Acquisition 210 | Stock Symbol : FOX
Data Acquisition 211 | Stock Symbol : BEN
Data Acquisition 212 | Stock Symbol : FCX
Data Acquisition 213 | Stock Symbol : GPS
Data Acquisition 214 | Stock Symbol : GRMN
Data Acquisition 215 | Stock Symbol : IT
Data Acquisition 216 | Stock Symbol : GD
Data Acquisition 217 | Stock Symbol : GE
Data Acquisition 218 | Stock Symbol : GIS
Data Acquisition 219 | Stock Symbol : GM
Data Acquisition 220 | Stock Symbo

Data Acquisition 391 | Stock Symbol : PVH
Data Acquisition 392 | Stock Symbol : QRVO
Data Acquisition 393 | Stock Symbol : PWR
Data Acquisition 394 | Stock Symbol : QCOM
Data Acquisition 395 | Stock Symbol : DGX
Data Acquisition 396 | Stock Symbol : RL
Data Acquisition 397 | Stock Symbol : RJF
Data Acquisition 398 | Stock Symbol : RTN
Data Acquisition 399 | Stock Symbol : O
Data Acquisition 400 | Stock Symbol : REG
Data Acquisition 401 | Stock Symbol : REGN
Data Acquisition 402 | Stock Symbol : RF
Data Acquisition 403 | Stock Symbol : RSG
Data Acquisition 404 | Stock Symbol : RMD
Data Acquisition 405 | Stock Symbol : RHI
Data Acquisition 406 | Stock Symbol : ROK
Data Acquisition 407 | Stock Symbol : ROL
Data Acquisition 408 | Stock Symbol : ROP
Data Acquisition 409 | Stock Symbol : ROST
Data Acquisition 410 | Stock Symbol : RCL
Data Acquisition 411 | Stock Symbol : CRM
Data Acquisition 412 | Stock Symbol : SBAC
Data Acquisition 413 | Stock Symbol : SLB
Data Acquisition 414 | Stock Symb

## Display Data Output

In [32]:
# # Show data extracted
# stocks_data

[{'symbol': 'MMM',
  'companyName': '3M Co.',
  'primaryExchange': 'New York Stock Exchange',
  'calculationPrice': 'close',
  'open': 162.39,
  'openTime': 1567690274965,
  'close': 164.23,
  'closeTime': 1567713767033,
  'high': 166.415,
  'low': 162.39,
  'latestPrice': 164.23,
  'latestSource': 'Close',
  'latestTime': 'September 5, 2019',
  'latestUpdate': 1567713767033,
  'latestVolume': 2421361,
  'iexRealtimePrice': 164.25,
  'iexRealtimeSize': 100,
  'iexLastUpdated': 1567713594190,
  'delayedPrice': 164.23,
  'delayedPriceTime': 1567714200010,
  'extendedPrice': 164.14,
  'extendedChange': -0.09,
  'extendedChangePercent': -0.00055,
  'extendedPriceTime': 1567725861951,
  'previousClose': 160.07,
  'previousVolume': 1520739,
  'change': 4.16,
  'changePercent': 0.02599,
  'volume': 2421361,
  'iexMarketPercent': 0.024636144713654842,
  'iexVolume': 59653,
  'avgTotalVolume': 2972058,
  'iexBidPrice': 0,
  'iexBidSize': 0,
  'iexAskPrice': 0,
  'iexAskSize': 0,
  'marketCap': 

## Convert data to Data Frame and save in CSV & JSON

In [33]:
# # Convert data extracted to data frame
# stocks_data_df = pd.DataFrame(stocks_data)
# stocks_data_df

# # Save in csv and json file
# stocks_data_df.to_csv("../stocks_data/raw_stock_data_fortune500.csv", index=False, encoding='utf8')

# stocks_data_df.to_json("../stocks_data/raw_stock_data_fortune500.json", orient='columns')


# Extract Historical Stocks Data

##  Stocks Price & Dividend - Financial Services - Insurance 

In [26]:
# # Yahoo Finance API

# historical_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-historical-data"

# headers = {
#     'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
#     'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
#     }

# prices_list = []
# event_list = []

# stock_history_counter = 1

# # List of 20 Stocks from Insurance industry

# symbols_insurance = ['AIG','AFL','ALL','AON','AJG','CB','CINF','RE','GL','HIG','MMC','MET','PFG','PGR','PRU','TRV','UNM','WLTW','LNC','L']

# # symbols_insurance = ['AIG','AFL','ALL'] # Test list

# #Print statement to inform when data acquisition start
# print(f"Beginning Data Acquisition!!!")
# print(f"------------&&&&-------------------")

# for s in tqdm(symbols_insurance):
    
#     querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": s}

#     try: 
#         response_history = requests.get(historical_url,headers = headers, params=querystring).json()
        
#         prices = response_history['prices']
#         events = response_history['eventsData']
        
#         for price in prices:
#             price['symbol'] = s
#             prices_list.append(price)
        
#         for event in events:
#             event['symbol'] = s
#             event_list.append(event)
        
#         print(f"Data Acquisition {stock_history_counter} | Stock Symbol : {s}")
        
#         stock_history_counter = stock_history_counter+ 1
    
#     except:
#         print("Stock not found...!!")
#     continue
    
# # Print statement to inform when all records have been acquired.
# print(f"------------&&&&---------------")
# print(f"Data Acquisition Complete")
# print(f"------------&&&&---------------")      



Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

Data Acquisition 1 | Stock Symbol : AIG
Data Acquisition 2 | Stock Symbol : AFL
Data Acquisition 3 | Stock Symbol : ALL
Data Acquisition 4 | Stock Symbol : AON
Data Acquisition 5 | Stock Symbol : AJG
Data Acquisition 6 | Stock Symbol : CB
Stock not found...!!
Data Acquisition 7 | Stock Symbol : RE
Data Acquisition 8 | Stock Symbol : GL
Data Acquisition 9 | Stock Symbol : HIG
Data Acquisition 10 | Stock Symbol : MMC
Data Acquisition 11 | Stock Symbol : MET
Stock not found...!!
Data Acquisition 12 | Stock Symbol : PGR
Data Acquisition 13 | Stock Symbol : PRU
Data Acquisition 14 | Stock Symbol : TRV
Data Acquisition 15 | Stock Symbol : UNM
Data Acquisition 16 | Stock Symbol : WLTW
Data Acquisition 17 | Stock Symbol : LNC
Data Acquisition 18 | Stock Symbol : L

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


## View Output Data & Create Data Frame

In [101]:
# print(json.dumps(prices_list, indent =4))
# print(json.dumps(event_list, indent =4))
print(prices_list)
print(event_list)

[{'date': 1567396800, 'open': 51.75, 'high': 52.4900016784668, 'low': 51.650001525878906, 'close': 52.400001525878906, 'volume': 2897700, 'adjclose': 52.400001525878906, 'symbol': 'AIG'}, {'date': 1566792000, 'open': 52.709999084472656, 'high': 53.470001220703125, 'low': 51.79999923706055, 'close': 52.040000915527344, 'volume': 14928000, 'adjclose': 52.040000915527344, 'symbol': 'AIG'}, {'date': 1566187200, 'open': 55.029998779296875, 'high': 55.20000076293945, 'low': 52.150001525878906, 'close': 52.4900016784668, 'volume': 15374200, 'adjclose': 52.4900016784668, 'symbol': 'AIG'}, {'date': 1565582400, 'open': 55.84000015258789, 'high': 56.5, 'low': 52.939998626708984, 'close': 54.310001373291016, 'volume': 24792800, 'adjclose': 54.310001373291016, 'symbol': 'AIG'}, {'date': 1564977600, 'open': 53.459999084472656, 'high': 57.47999954223633, 'low': 52.290000915527344, 'close': 56.2599983215332, 'volume': 25753700, 'adjclose': 56.2599983215332, 'symbol': 'AIG'}, {'date': 1564372800, 'open




[{'amount': 0.32, 'date': 1560432600, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1552570200, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1544538600, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1536931800, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1528896600, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1521034200, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1512657000, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1505395800, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1497274200, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1489411800, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'date': 1481034600, 'type': 'DIVIDEND', 'data': 0.32, 'symbol': 'AIG'}, {'amount': 0.32, 'da

In [29]:
# Convert to Data Frame
price_history = pd.DataFrame(prices_list)
dividend_history = pd.DataFrame(event_list)

## Save data to CSV & JSON

In [30]:
# ## Save data to CSV and JSON

# price_history.to_csv("../stocks_data/raw_price_history.csv", index=False, encoding='utf8')
# dividend_history.to_csv("../stocks_data/raw_dividend_history.csv", index=False, encoding='utf8')

# price_history.to_json("../stocks_data/raw_price_history.json", orient='columns')
# dividend_history.to_json("../stocks_data/raw_dividend_history.json", orient='columns')


# Extract News Stock Data - Financial Services - Insurance

## Daily News Stocks - Financial Services - Insurance

In [184]:
# # Yahoo Finance API - News Data

# news_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/get-news"


# #symbols_insurance = ['AIG','AFL','ALL'] # testing list
# symbols_insurance = ['AIG','AFL','ALL','AON','AJG','CB','CINF','RE','GL','HIG','MMC','MET','PFG','PGR','PRU','TRV','UNM','WLTW','LNC','L']


# headers = {
#     'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
#     'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
#     }

# stocks_news = []

# stock_news_counter = 1

# #Print statement to inform when data acquisition start
# print(f"Beginning Data Acquisition!!!")
# print(f"------------&&&&-------------------")

# for news in tqdm(symbols_insurance):

#     querystring_news = {"region":"US","category":news}

#     try: 
#         response_news = requests.get(news_url,headers = headers, params= querystring_news).json()
        
#         stocks_news.append(response_news)
           
#         print(f"Data Acquisition {stock_news_counter} | Stock Symbol : {news}")
        
#         stock_news_counter = stock_news_counter + 1
    
#     except:
#         print("Stock not found...!!")
#     continue
    
# # Print statement to inform when all records have been acquired.
# print(f"------------&&&&---------------")
# print(f"Data Acquisition Complete")
# print(f"------------&&&&---------------")    


Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

Data Acquisition 1 | Stock Symbol : AIG
Data Acquisition 2 | Stock Symbol : AFL
Data Acquisition 3 | Stock Symbol : ALL
Data Acquisition 4 | Stock Symbol : AON
Data Acquisition 5 | Stock Symbol : AJG
Data Acquisition 6 | Stock Symbol : CB
Data Acquisition 7 | Stock Symbol : CINF
Data Acquisition 8 | Stock Symbol : RE
Data Acquisition 9 | Stock Symbol : GL
Data Acquisition 10 | Stock Symbol : HIG
Data Acquisition 11 | Stock Symbol : MMC
Data Acquisition 12 | Stock Symbol : MET
Data Acquisition 13 | Stock Symbol : PFG
Data Acquisition 14 | Stock Symbol : PGR
Data Acquisition 15 | Stock Symbol : PRU
Data Acquisition 16 | Stock Symbol : TRV
Data Acquisition 17 | Stock Symbol : UNM
Data Acquisition 18 | Stock Symbol : WLTW
Data Acquisition 19 | Stock Symbol : LNC
Data Acquisition 20 | Stock Symbol : L

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


## Print output data and check length of extracted information 

In [185]:
# # Print output data and check length of extracted information 
# print(json.dumps(stocks_news, indent =4))
# len(stocks_news)

[
    {
        "more": {
            "result": [
                {
                    "uuid": "42031d19-ed4b-332f-aa97-75a0884a53d8"
                },
                {
                    "uuid": "b5f63c38-462e-311a-9f99-170043139c33"
                },
                {
                    "uuid": "39a7a376-03a5-39a1-86ba-7d80930436ff"
                },
                {
                    "uuid": "1c4b1b1d-74ea-332b-863b-60568f5a1266"
                },
                {
                    "uuid": "08e90f0c-34f5-3df7-8f34-414165a94dde"
                },
                {
                    "uuid": "668c58be-31cb-34c1-85a0-bebcddf27b5a"
                },
                {
                    "uuid": "1931d911-d3b7-3fa3-9a72-5f7286cd44d6"
                },
                {
                    "uuid": "1ab39e53-2bfe-3676-9e9f-2f0844aef722"
                },
                {
                    "uuid": "85e3c850-df0e-3260-ac8b-bf4b05b3cffb"
                },
              

]


20

## Loop to obtain only the key data

In [186]:
# # First loop to get to the desire results
# stocks_news_list = []

# for i in range(len(stocks_news)):
#     for j in range(len(stocks_news[i]['items']['result'])):
#         stocks_news_list.append(stocks_news[i]['items']['result'][j])
#         #ticker_list.append(stocks_news[i]['items']['result'][j]['entities'])
        
# stocks_news_list

[{'uuid': '7fbc1b3d-a49c-3126-94eb-cf2260324161',
  'title': 'Reinsurers benefit from calm amid the storm',
  'link': 'http://www.ft.com/cms/s/34bda2a2-cee1-11e9-b018-ca4456540ea6,s01=1.html?ftcamp=traffic/partner/feed_headline/us_yahoo/auddev&yptr=yahoo',
  'summary': 'As the $600bn reinsurance industry’s top brass prepare to gather for their annual shindig in Monte Carlo this weekend, they have one eye on the path of Hurricane Dorian up the eastern coast of the US.  The lack of large insurance losses this year has been a boon for the industry, but payouts could rise sharply if the biggest storm of this year’s hurricane season makes landfall in the US.  The prices that the likes of Swiss Re, Munich Re and Warren Buffett’s Berkshire Hathaway can charge their customers had been falling for years as the era of low interest rates drove money — and hence fresh capacity — into the industry.',
  'publisher': 'Financial Times',
  'author': 'Oliver Ralph in London',
  'ignore_main_image': Fals

## Looping, append and create a Data Frame

In [187]:
# # Creating a news data frame from the json output
# # https://python-forum.io/Thread-build-pandas-dataframe-from-a-for-loop

# # Data Frame
# stocks_news_df = pd.DataFrame({"title": [], "source_link": [], "summary": [
# ], "publisher": [], "date": [], "symbol": [], 'name': []})

# for news in range(len(stocks_news_list)):
#         news_data = stocks_news_list[news]
#         stocks_news_df = stocks_news_df.append({'title': news_data['title'], 'source_link': news_data['link'],\
#                                                 'summary': news_data['summary'],'publisher': news_data['publisher'],\
#                                                 'date': news_data['published_at'], 'symbol': news_data['entities'][0]['term'],\
#                                                 "name": news_data['entities'][0]['label'] }, ignore_index=True)



## Visualize all output data

In [188]:
# stocks_news_df

Unnamed: 0,title,source_link,summary,publisher,date,symbol,name
0,Reinsurers benefit from calm amid the storm,http://www.ft.com/cms/s/34bda2a2-cee1-11e9-b01...,As the $600bn reinsurance industry’s top brass...,Financial Times,1.567652e+09,TICKER:AIG,"American International Group, I"
1,'Fast Money ' Traders Share Their Thoughts On ...,https://finance.yahoo.com/news/fast-money-trad...,"Stephanie Link said on CNBC's ""Fast Money Half...",Benzinga,1.567595e+09,TICKER:AIG,"American International Group, I"
2,AIG Names Duncan Ellis Head of Retail Property...,https://finance.yahoo.com/news/aig-names-dunca...,"American International Group, Inc. (AIG) today...",Business Wire,1.567543e+09,TICKER:AIG,"American International Group, I"
3,What Makes Allstate (ALL) an Attractive Invest...,https://finance.yahoo.com/news/makes-allstate-...,Allstate's (ALL) well-performing Property and ...,Zacks,1.567429e+09,TICKER:AIG,"American International Group, I"
4,Down From the Peak - Everest Re Group Is Weak ...,https://realmoney.thestreet.com/investing/down...,After writing about American International Gro...,TheStreet.com,1.567174e+09,TICKER:RE,"Everest Re Group, Ltd."
5,AIG Up 33% This Year After Dismal Run in 2018:...,https://finance.yahoo.com/news/aig-33-dismal-r...,"Acquisitions, dispositions, cost management, r...",Zacks,1.567166e+09,TICKER:AIG,"American International Group, I"
6,Leave room for the firetruck: Insurers help de...,https://finance.yahoo.com/news/leave-room-fire...,Bruce Gendelman wanted a mountainside retreat ...,Reuters,1.567163e+09,TICKER:AIG,"American International Group, I"
7,AIG Is Likely to Trend Lower With Interest Rates,https://realmoney.thestreet.com/investing/stoc...,American International Group Inc. has been te...,TheStreet.com,1.567160e+09,TICKER:AIG,"American International Group, I"
8,AIG to Present at the Barclays Global Financia...,https://finance.yahoo.com/news/aig-present-bar...,"American International Group, Inc. announced t...",Business Wire,1.567023e+09,TICKER:AIG,"American International Group, I"
9,Life insurance startup Ethos raises $60 millio...,https://finance.yahoo.com/news/life-insurance-...,Ethos is betting on predictive analytics and b...,Yahoo Finance,1.566946e+09,TICKER:AIG,"American International Group, I"


## Save data to CSV & JSON

In [190]:
# # Savings to CSV and JSON files

# stocks_news_df.to_csv("../stocks_data/raw_stock_news_data.csv", index=False, encoding='utf8')

# stocks_news_df.to_json("../stocks_data/raw_stock_news_data.json", orient='columns')

# Extracting Financial Statements

In [208]:
# Yahoo Finance API for Financials Statements

financials_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-financials"

symbols_insurance = ['AIG','AFL','ALL'] # testing list
# symbols_insurance = ['AIG','AFL','ALL','AON','AJG','CB','CINF','RE','GL','HIG','MMC','MET','PFG','PGR','PRU','TRV','UNM','WLTW','LNC','L']


headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

balance_sheets = []
income_sheets = []
cashflow_sheets = []
#stocks_financials = []

stock_financials_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for financials in tqdm(symbols_insurance):
    
    querystring_financials = {"symbol":financials}
    
    try: 
        response_financials = requests.get(financials_url,headers = headers, params=querystring_financials).json()
        
        #stocks_financials.append(response_financials)
        balance = response_financials['balanceSheetHistoryQuarterly']["balanceSheetStatements"]
        income = response_financials["incomeStatementHistoryQuarterly"]["incomeStatementHistory"]
        cashflow = response_financials['cashflowStatementHistoryQuarterly']["cashflowStatements"]
        
 
        for bal in balance:
            bal['symbol'] = financials
            balance_sheets.append(bal)
        
        for inc in income:
            inc['symbol'] = financials
            income_sheets.append(inc)
            
        for cash in cashflow:
            cash['symbol'] = financials
            cashflow_sheets.append(cash)
        

        print(f"Data Acquisition {stock_financials_counter} | Stock Symbol : {financials}")
        
        stock_financials_counter = stock_financials_counter + 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")  

Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

Data Acquisition 1 | Stock Symbol : AIG
Data Acquisition 2 | Stock Symbol : AFL
Data Acquisition 3 | Stock Symbol : ALL

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [278]:
balance_sheets

[{'intangibleAssets': {'raw': 11386000000,
   'fmt': '11.39B',
   'longFmt': '11,386,000,000'},
  'capitalSurplus': {'raw': 81211000000,
   'fmt': '81.21B',
   'longFmt': '81,211,000,000'},
  'totalLiab': {'raw': 456164000000,
   'fmt': '456.16B',
   'longFmt': '456,164,000,000'},
  'totalStockholderEquity': {'raw': 64054000000,
   'fmt': '64.05B',
   'longFmt': '64,054,000,000'},
  'minorityInterest': {'raw': 1566000000,
   'fmt': '1.57B',
   'longFmt': '1,566,000,000'},
  'otherCurrentLiab': {'raw': 27781000000,
   'fmt': '27.78B',
   'longFmt': '27,781,000,000'},
  'totalAssets': {'raw': 522269000000,
   'fmt': '522.27B',
   'longFmt': '522,269,000,000'},
  'endDate': {'raw': 1561852800, 'fmt': '2019-06-30'},
  'commonStock': {'raw': 4766000000,
   'fmt': '4.77B',
   'longFmt': '4,766,000,000'},
  'otherCurrentAssets': {'raw': 44032000000,
   'fmt': '44.03B',
   'longFmt': '44,032,000,000'},
  'retainedEarnings': {'raw': 22077000000,
   'fmt': '22.08B',
   'longFmt': '22,077,000,000

In [285]:
balance_list = []
for bbb in range(len(balance_sheets)):
    balancex = pd.DataFrame.from_dict(balance_sheets[bbb],orient='columns')
    balance_list.append(balancex)
    
income_list = []
for iii in range(len(income_sheets)):
    incomex = pd.DataFrame.from_dict(income_sheets[iii],orient='columns')
    income_list.append(incomex)
    
cashflow_list = []
for ccc in range(len(cashflow_sheets)):
    cashx = pd.DataFrame.from_dict(cashflow_sheets[ccc],orient='columns')
    cashflow_list.append(cashx)

In [286]:
balance_list

[        intangibleAssets  capitalSurplus        totalLiab  \
 fmt               11.39B          81.21B          456.16B   
 longFmt   11,386,000,000  81,211,000,000  456,164,000,000   
 raw          11386000000     81211000000     456164000000   
 
         totalStockholderEquity minorityInterest otherCurrentLiab  \
 fmt                     64.05B            1.57B           27.78B   
 longFmt         64,054,000,000    1,566,000,000   27,781,000,000   
 raw                64054000000       1566000000      27781000000   
 
              totalAssets     endDate    commonStock otherCurrentAssets  ...  \
 fmt              522.27B  2019-06-30          4.77B             44.03B  ...   
 longFmt  522,269,000,000         NaN  4,766,000,000     44,032,000,000  ...   
 raw         522269000000  1561852800     4766000000        44032000000  ...   
 
         otherStockholderEquity totalCurrentAssets longTermInvestments  \
 fmt                      4.99B             74.58B             267.07B   
 l

In [276]:
# balance_sheets = [balance_sheets[xx] for xx in range(len(balance_sheets))]
# balance_sheets

# balance_sheets_df = [balance_list[q].append(balance_list[q+1]) for q in range(len(balance_list))]

for q in range(len(balance_list)):
    df = balance_list[q].append(balance_list[q+1])
    df
     

# balance_list_df = balance_list[0].append(balance_list[1])

# dictionary = {i : balance_list[i] for i in range(0,len(balance_list))}


In [277]:
df

NameError: name 'df' is not defined

In [193]:
# Print in json format

stocks_financials
print(json.dumps(stocks_financials, indent =4))

[
    {
        "cashflowStatementHistory": {
            "cashflowStatements": [
                {
                    "investments": {
                        "raw": 11298000000,
                        "fmt": "11.3B",
                        "longFmt": "11,298,000,000"
                    },
                    "changeToLiabilities": {
                        "raw": 0,
                        "fmt": null,
                        "longFmt": "0"
                    },
                    "totalCashflowsFromInvestingActivities": {
                        "raw": -223000000,
                        "fmt": "-223M",
                        "longFmt": "-223,000,000"
                    },
                    "netBorrowings": {
                        "raw": 1062000000,
                        "fmt": "1.06B",
                        "longFmt": "1,062,000,000"
                    },
                    "totalCashFromFinancingActivities": {
                        "raw": 794000000,
           

]


In [50]:
stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][0]

{'intangibleAssets': {'raw': 11386000000,
  'fmt': '11.39B',
  'longFmt': '11,386,000,000'},
 'capitalSurplus': {'raw': 81211000000,
  'fmt': '81.21B',
  'longFmt': '81,211,000,000'},
 'totalLiab': {'raw': 456164000000,
  'fmt': '456.16B',
  'longFmt': '456,164,000,000'},
 'totalStockholderEquity': {'raw': 64054000000,
  'fmt': '64.05B',
  'longFmt': '64,054,000,000'},
 'minorityInterest': {'raw': 1566000000,
  'fmt': '1.57B',
  'longFmt': '1,566,000,000'},
 'otherCurrentLiab': {'raw': 27781000000,
  'fmt': '27.78B',
  'longFmt': '27,781,000,000'},
 'totalAssets': {'raw': 522269000000,
  'fmt': '522.27B',
  'longFmt': '522,269,000,000'},
 'endDate': {'raw': 1561852800, 'fmt': '2019-06-30'},
 'commonStock': {'raw': 4766000000,
  'fmt': '4.77B',
  'longFmt': '4,766,000,000'},
 'otherCurrentAssets': {'raw': 44032000000,
  'fmt': '44.03B',
  'longFmt': '44,032,000,000'},
 'retainedEarnings': {'raw': 22077000000,
  'fmt': '22.08B',
  'longFmt': '22,077,000,000'},
 'otherLiab': {'raw': 31103

In [58]:
# Create empty list to store data

balance_sheets = []
cashflow_sheets = []
income_sheets = []

# had to input -1 because list index out of range error
# https://stackoverflow.com/questions/37619848/python-loop-list-index-out-of-range/37620170

for x in range(len(stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"])):
    #print(stocks_financials[x]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])
    balance_sheets.append(stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])


# for y in range(len(stocks_financials)-1):
#     #print(stocks_financials[y]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
#     cashflow_sheets.append(stocks_financials[0]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
    
# for z in range(len(stocks_financials)-1):
#     #print(stocks_financials[z]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])
#     income_sheets.append(stocks_financials[0]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])

In [64]:
# balance_sheets[1]
# # Create Data Frames
# balance_sheets_df = pd.DataFrame(balance_sheets)
# balance_sheets_df

balance_list = []
for xxx in range(len(balance_sheets)):
    balancex = pd.DataFrame.from_dict(balance_sheets[xxx],orient='columns')
    balance_list.append(balancex)



In [69]:
balance_list[0]

Unnamed: 0,intangibleAssets,capitalSurplus,totalLiab,totalStockholderEquity,minorityInterest,otherCurrentLiab,totalAssets,endDate,commonStock,otherCurrentAssets,...,shortLongTermDebt,otherStockholderEquity,totalCurrentAssets,longTermInvestments,netTangibleAssets,shortTermInvestments,netReceivables,maxAge,longTermDebt,accountsPayable
fmt,11.39B,81.21B,456.16B,64.05B,1.57B,27.78B,522.27B,2019-06-30,4.77B,44.03B,...,1.24B,4.99B,74.58B,267.07B,64.05B,15B,12.61B,1,35.05B,81.06B
longFmt,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000
raw,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,1561852800,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000


In [226]:
balance_list_df = balance_list[0].append(balance_list[1])
balance_list_df

Unnamed: 0,intangibleAssets,capitalSurplus,totalLiab,totalStockholderEquity,minorityInterest,otherCurrentLiab,totalAssets,endDate,commonStock,otherCurrentAssets,...,otherStockholderEquity,totalCurrentAssets,longTermInvestments,netTangibleAssets,shortTermInvestments,netReceivables,maxAge,longTermDebt,accountsPayable,symbol
fmt,11.39B,81.21B,456.16B,64.05B,1.57B,27.78B,522.27B,2019-06-30,4.77B,44.03B,...,4.99B,74.58B,267.07B,64.05B,15B,12.61B,1,35.05B,81.06B,AIG
longFmt,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,,4766000000,44032000000,...,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000,AIG
raw,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,1561852800,4766000000,44032000000,...,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000,AIG
fmt,12.13B,81.15B,450.83B,60.3B,1.31B,26.9B,512.92B,2019-03-31,4.77B,44.55B,...,2.13B,70.65B,260.69B,60.3B,10.88B,12.65B,1,34.52B,82.5B,AIG
longFmt,12128000000,81148000000,450829000000,60302000000,1306000000,26896000000,512922000000,,4766000000,44546000000,...,2128000000,70648000000,260692000000,60302000000,10882000000,12655000000,1,34523000000,82496000000,AIG
raw,12128000000,81148000000,450829000000,60302000000,1306000000,26896000000,512922000000,1553990400,4766000000,44546000000,...,2128000000,70648000000,260692000000,60302000000,10882000000,12655000000,1,34523000000,82496000000,AIG


In [15]:
# Obtain all data with list comprehensions so it can be put in a data frame

balance_sheets = [balance_sheets[xx] for xx in range(len(balance_sheets))]
balance_sheets

cashflow_sheets = [cashflow_sheets[yy] for yy in range(len(cashflow_sheets))]
cashflow_sheets

income_sheets = [income_sheets[0][zz] for zz in range(len(income_sheets))]
income_sheets

balance_sheets


[]

In [12]:
balance_sheets

[]

In [None]:
# Create Data Frames
balance_sheets_df = pd.DataFrame(balance_sheets)
balance_sheets_df


cashflow_sheets_df = pd.DataFrame(cashflow_sheets)
cashflow_sheets_df

income_sheets_df = pd.DataFrame(income_sheets)
income_sheets_df

# Saving in CSV data

# balance_sheets_df.to_csv("stocks_data/raw_balance_sheets_data.csv", index=False, encoding='utf8')
# cashflow_sheets_df.to_csv("stocks_data/raw_cashflow_sheets_data.csv", index=False, encoding='utf8')
# income_sheets_df.to_csv("stocks_data/raw_income_sheets_data.csv", index=False, encoding='utf8')