In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv
from tqdm import tqdm_notebook as tqdm
from bs4 import BeautifulSoup as bs
from splinter import Browser


## Scrape/Extract Fortune 500 table

In [2]:
# Use Pandas to scrape the table containing stocks data.
url_stock = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

# Convert table to html
stock_table = pd.read_html(url_stock)

# Print table 
print(stock_table)

[    Symbol                         Security SEC filings  \
0      MMM                       3M Company     reports   
1      ABT              Abbott Laboratories     reports   
2     ABBV                      AbbVie Inc.     reports   
3     ABMD                      ABIOMED Inc     reports   
4      ACN                    Accenture plc     reports   
5     ATVI              Activision Blizzard     reports   
6     ADBE                Adobe Systems Inc     reports   
7      AMD       Advanced Micro Devices Inc     reports   
8      AAP               Advance Auto Parts     reports   
9      AES                         AES Corp     reports   
10     AMG    Affiliated Managers Group Inc     reports   
11     AFL                        AFLAC Inc     reports   
12       A         Agilent Technologies Inc     reports   
13     APD     Air Products & Chemicals Inc     reports   
14    AKAM          Akamai Technologies Inc     reports   
15     ALK             Alaska Air Group Inc     reports

[216 rows x 6 columns]]


In [8]:
# Identify what type is the output data
type(stock_table)

# Ensure to select the first part of the list
stock_table_df = pd.DataFrame(stock_table[0])

# Save raw data to csv
stock_table_df.to_csv("stocks_data/fortune500table.csv", index=False, encoding='utf8')

stock_table_df.head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,ABIOMED Inc,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [5]:
# Make he Symbol column to a list 
symbols_list = stock_table_df["Symbol"].values.tolist()
symbols_list

['MMM',
 'ABT',
 'ABBV',
 'ABMD',
 'ACN',
 'ATVI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AMG',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALXN',
 'ALGN',
 'ALLE',
 'AGN',
 'ADS',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AIV',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ARNC',
 'ANET',
 'AJG',
 'AIZ',
 'ATO',
 'T',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BHGE',
 'BLL',
 'BAC',
 'BK',
 'BAX',
 'BBT',
 'BDX',
 'BRK.B',
 'BBY',
 'BIIB',
 'BLK',
 'HRB',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BF.B',
 'CHRW',
 'COG',
 'CDNS',
 'CPB',
 'COF',
 'CPRI',
 'CAH',
 'KMX',
 'CCL',
 'CAT',
 'CBOE',
 'CBRE',
 'CBS',
 'CE',
 'CELG',
 'CNC',
 'CNP',
 'CTL',
 'CERN',
 'CF',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'XEC',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CM

## Extract Stocks information for fortune 500

In [152]:
base_url = "https://cloud.iexapis.com/stable/stock/"
key_token = "/quote?token=pk_e4c3fa15e49a4efca388fcfbcece0eac"

# Empty list to store data
stocks_data = []

# Start counter for the loop

symbol_counter = 1

symbols_list2 = ['MMM','ABT','AIG','HAL','SLB']

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for symbol in tqdm(symbols_list): #Note that we are using the symbol list
    try: 
        response = requests.get(base_url+symbol+key_token).json()
        stocks_data.append(response)
        
        print(f"Data Acquisition {symbol_counter} | Stock Symbol : {symbol}")
        symbol_counter = symbol_counter + 1
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")      


Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

Data Acquisition 1 | Stock Symbol : MMM
Data Acquisition 2 | Stock Symbol : ABT
Data Acquisition 3 | Stock Symbol : ABBV
Data Acquisition 4 | Stock Symbol : ABMD
Data Acquisition 5 | Stock Symbol : ACN
Data Acquisition 6 | Stock Symbol : ATVI
Data Acquisition 7 | Stock Symbol : ADBE
Data Acquisition 8 | Stock Symbol : AMD
Data Acquisition 9 | Stock Symbol : AAP
Data Acquisition 10 | Stock Symbol : AES
Data Acquisition 11 | Stock Symbol : AMG
Data Acquisition 12 | Stock Symbol : AFL
Data Acquisition 13 | Stock Symbol : A
Data Acquisition 14 | Stock Symbol : APD
Data Acquisition 15 | Stock Symbol : AKAM
Data Acquisition 16 | Stock Symbol : ALK
Data Acquisition 17 | Stock Symbol : ALB
Data Acquisition 18 | Stock Symbol : ARE
Data Acquisition 19 | Stock Symbol : ALXN
Data Acquisition 20 | Stock Symbol : ALGN
Data Acquisition 21 | Stock Symbol : ALLE
Data Acquisition 22 | Stock Symbol : AGN
Data Acquisition 23 | Stock Symbol : ADS
Data Acquisition 24 | Stock Symbol : LNT
Data Acquisition 25

Data Acquisition 197 | Stock Symbol : FITB
Data Acquisition 198 | Stock Symbol : FE
Data Acquisition 199 | Stock Symbol : FRC
Data Acquisition 200 | Stock Symbol : FISV
Data Acquisition 201 | Stock Symbol : FLT
Data Acquisition 202 | Stock Symbol : FLIR
Data Acquisition 203 | Stock Symbol : FLS
Data Acquisition 204 | Stock Symbol : FMC
Data Acquisition 205 | Stock Symbol : F
Data Acquisition 206 | Stock Symbol : FTNT
Data Acquisition 207 | Stock Symbol : FTV
Data Acquisition 208 | Stock Symbol : FBHS
Data Acquisition 209 | Stock Symbol : FOXA
Data Acquisition 210 | Stock Symbol : FOX
Data Acquisition 211 | Stock Symbol : BEN
Data Acquisition 212 | Stock Symbol : FCX
Data Acquisition 213 | Stock Symbol : GPS
Data Acquisition 214 | Stock Symbol : GRMN
Data Acquisition 215 | Stock Symbol : IT
Data Acquisition 216 | Stock Symbol : GD
Data Acquisition 217 | Stock Symbol : GE
Data Acquisition 218 | Stock Symbol : GIS
Data Acquisition 219 | Stock Symbol : GM
Data Acquisition 220 | Stock Symbo

Data Acquisition 391 | Stock Symbol : PVH
Data Acquisition 392 | Stock Symbol : QRVO
Data Acquisition 393 | Stock Symbol : PWR
Data Acquisition 394 | Stock Symbol : QCOM
Data Acquisition 395 | Stock Symbol : DGX
Data Acquisition 396 | Stock Symbol : RL
Data Acquisition 397 | Stock Symbol : RJF
Data Acquisition 398 | Stock Symbol : RTN
Data Acquisition 399 | Stock Symbol : O
Data Acquisition 400 | Stock Symbol : REG
Data Acquisition 401 | Stock Symbol : REGN
Data Acquisition 402 | Stock Symbol : RF
Data Acquisition 403 | Stock Symbol : RSG
Data Acquisition 404 | Stock Symbol : RMD
Data Acquisition 405 | Stock Symbol : RHI
Data Acquisition 406 | Stock Symbol : ROK
Data Acquisition 407 | Stock Symbol : ROL
Data Acquisition 408 | Stock Symbol : ROP
Data Acquisition 409 | Stock Symbol : ROST
Data Acquisition 410 | Stock Symbol : RCL
Data Acquisition 411 | Stock Symbol : CRM
Data Acquisition 412 | Stock Symbol : SBAC
Data Acquisition 413 | Stock Symbol : SLB
Data Acquisition 414 | Stock Symb

In [161]:
# Show data extracted
stocks_data

[{'symbol': 'MMM',
  'companyName': '3M Co.',
  'primaryExchange': 'New York Stock Exchange',
  'calculationPrice': 'close',
  'open': 159.83,
  'openTime': 1567603800493,
  'close': 160.07,
  'closeTime': 1567627356301,
  'high': 160.63,
  'low': 158.99,
  'latestPrice': 160.07,
  'latestSource': 'Close',
  'latestTime': 'September 4, 2019',
  'latestUpdate': 1567627356301,
  'latestVolume': 1508833,
  'iexRealtimePrice': 160.03,
  'iexRealtimeSize': 64,
  'iexLastUpdated': 1567627195807,
  'delayedPrice': 160.07,
  'delayedPriceTime': 1567627800012,
  'extendedPrice': 160.07,
  'extendedChange': 0,
  'extendedChangePercent': 0,
  'extendedPriceTime': 1567637014226,
  'previousClose': 158.64,
  'previousVolume': None,
  'change': 1.43,
  'changePercent': 0.00901,
  'volume': 1508833,
  'iexMarketPercent': 0.03365912596026201,
  'iexVolume': 50786,
  'avgTotalVolume': 3000058,
  'iexBidPrice': 0,
  'iexBidSize': 0,
  'iexAskPrice': 0,
  'iexAskSize': 0,
  'marketCap': 92084909530,
  'p

In [166]:
# Convert data extracted to data frame
stocks_data_df = pd.DataFrame(stocks_data)
stocks_data_df

# Save in csv file
stocks_data_df.to_csv("stocks_data/raw_stock_data_fortune500.csv", index=False, encoding='utf8')



In [167]:
# # Display data saved
stocks_data_df

Unnamed: 0,avgTotalVolume,calculationPrice,change,changePercent,close,closeTime,companyName,delayedPrice,delayedPriceTime,extendedChange,...,openTime,peRatio,previousClose,previousVolume,primaryExchange,symbol,volume,week52High,week52Low,ytdChange
0,3000058,close,1.43,0.00901,160.07,1567627356301,3M Co.,160.070,1567627800012,0.00,...,1567603800493,18.95,158.64,,New York Stock Exchange,MMM,1508833,219.75,154.00,-0.160197
1,4573684,close,0.16,0.00190,84.22,1567627250015,Abbott Laboratories,84.220,1567627800002,0.00,...,1567604007247,51.35,84.06,,New York Stock Exchange,ABT,3680357,88.76,65.22,0.211396
2,9090799,close,0.98,0.01498,66.38,1567627252890,"AbbVie, Inc.",66.380,1567627800013,-0.02,...,1567603820744,24.36,65.40,,New York Stock Exchange,ABBV,9660964,96.60,62.66,-0.252083
3,716532,close,-0.26,-0.00142,183.38,1567627200260,"ABIOMED, Inc.",183.380,1567627200260,-0.88,...,1567603800120,32.07,183.64,,NASDAQ,ABMD,458494,459.75,181.02,-0.408956
4,1724995,close,1.96,0.01001,197.71,1567627206012,Accenture Plc,197.710,1567627800002,0.00,...,1567603800465,26.97,195.75,,New York Stock Exchange,ACN,1253219,200.68,132.63,0.402357
5,7000154,close,2.44,0.04779,53.50,1567627200294,"Activision Blizzard, Inc.",53.500,1567627200294,0.18,...,1567603800799,24.26,51.06,,NASDAQ,ATVI,9990366,84.67,39.85,0.133480
6,2231047,close,2.15,0.00761,284.60,1567627200449,"Adobe, Inc.",284.600,1567627200449,-0.03,...,1567603800464,52.41,282.45,,NASDAQ,ADBE,1516910,313.11,204.95,0.265347
7,69530176,close,0.05,0.00162,30.95,1567627200386,"Advanced Micro Devices, Inc.",30.950,1567627200386,0.03,...,1567603800885,164.98,30.90,,NASDAQ,AMD,46593945,35.55,16.03,0.642618
8,1423485,close,1.53,0.01090,141.89,1567627320346,"Advance Auto Parts, Inc.",141.890,1567627800003,0.00,...,1567603800394,23.55,140.36,,New York Stock Exchange,AAP,1235527,186.15,130.09,-0.100296
9,5118649,close,-0.05,-0.00326,15.29,1567627343243,The AES Corp.,15.290,1567627800004,0.00,...,1567603800426,25.58,15.34,,New York Stock Exchange,AES,3521950,18.52,13.00,0.078545


In [48]:
symbols22 = ["AIG", "SLB"]
querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": symbols22}


In [56]:
#querystring['symbol'][0]
symbols22[1]

'SLB'

## Extract Historical Stocks Data

### Need to figure out how to put it in a list

In [9]:
## Need to figure out how to put it in a list

historical_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-historical-data"

symbols22 = "AIG", "SLB"

querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": symbols22}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_history = []

stock_history_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for stock in tqdm(querystring):

    try: 
        response2 = requests.get(historical_url,headers = headers, params=querystring).json()
        stocks_history.append(response2)
        
        print(f"Data Acquisition {stock_history_counter} | Stock Symbol : {querystring}")
        stock_history_counter = stock_history_counter+ 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")      



Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Data Acquisition 1 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': ('AIG', 'SLB')}
Data Acquisition 2 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': ('AIG', 'SLB')}
Data Acquisition 3 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': ('AIG', 'SLB')}
Data Acquisition 4 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': ('AIG', 'SLB')}
Data Acquisition 5 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': ('AIG', 'SLB')}

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [12]:
stocks_history
print(json.dumps(stocks_history, indent =4))

[
    {
        "prices": [
            {
                "date": 1567396800,
                "open": 51.75,
                "high": 52.4900016784668,
                "low": 51.650001525878906,
                "close": 52.400001525878906,
                "volume": 2897700,
                "adjclose": 52.400001525878906
            },
            {
                "date": 1566792000,
                "open": 52.709999084472656,
                "high": 53.470001220703125,
                "low": 51.79999923706055,
                "close": 52.040000915527344,
                "volume": 14928000,
                "adjclose": 52.040000915527344
            },
            {
                "date": 1566187200,
                "open": 55.029998779296875,
                "high": 55.20000076293945,
                "low": 52.150001525878906,
                "close": 52.4900016784668,
                "volume": 15374200,
                "adjclose": 52.4900016784668
            },
            {
        

]


In [32]:
# Pulling "prices" history  and "eventsData" keys
stock_history_prices = []

for price in range(len(stocks_history)):
    #print(stocks_history[0]['prices'])
    stock_history_prices.append(stocks_history[0]['prices'])

In [133]:
# Create data frame

stock_history_prices_df =  pd.DataFrame(stock_history_prices[0])
stock_history_prices_df.head()

Unnamed: 0,adjclose,amount,close,data,date,denominator,high,low,numerator,open,splitRatio,type,volume
0,52.400002,,52.400002,,1567396800,,52.490002,51.650002,,51.75,,,2897700.0
1,52.040001,,52.040001,,1566792000,,53.470001,51.799999,,52.709999,,,14928000.0
2,52.490002,,52.490002,,1566187200,,55.200001,52.150002,,55.029999,,,15374200.0
3,54.310001,,54.310001,,1565582400,,56.5,52.939999,,55.84,,,24792800.0
4,56.259998,,56.259998,,1564977600,,57.48,52.290001,,53.459999,,,25753700.0


In [44]:
# Pulling "eventsData" history aka dividend 

stock_history_div = []

for div in range(len(stocks_history)):
    #print(stocks_history[0]['eventsData'])
    stock_history_div.append(stocks_history[0]['eventsData'])



In [46]:
# Create data frame

stock_history_div_df = pd.DataFrame(stock_history_div[0])
stock_history_div_df 

Unnamed: 0,amount,data,date,denominator,numerator,splitRatio,type
0,0.32,0.32,1560432600,,,,DIVIDEND
1,0.32,0.32,1552570200,,,,DIVIDEND
2,0.32,0.32,1544538600,,,,DIVIDEND
3,0.32,0.32,1536931800,,,,DIVIDEND
4,0.32,0.32,1528896600,,,,DIVIDEND
5,0.32,0.32,1521034200,,,,DIVIDEND
6,0.32,0.32,1512657000,,,,DIVIDEND
7,0.32,0.32,1505395800,,,,DIVIDEND
8,0.32,0.32,1497274200,,,,DIVIDEND
9,0.32,0.32,1489411800,,,,DIVIDEND


## Extract News Data

### Need to figure out how to put in a list and loop

In [181]:
# Getting the News

news_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/get-news"

# symbols22 = ["AIG", "SLB"]

querystring_news = {"region":"US","category":"AIG"}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_news = []

stock_news_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for news in tqdm(querystring):

    try: 
        response3 = requests.get(news_url,headers = headers, params= querystring_news).json()
        stocks_news.append(response3)
        
        #print(f"Data Acquisition {stock_news_counter} | Stock Symbol : {querystring_news}")
        
        stock_news_counter = stock_news_counter + 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")    


Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [182]:
stocks_news
print(json.dumps(stocks_news, indent =4))

[
    {
        "more": {
            "result": [
                {
                    "uuid": "b5f63c38-462e-311a-9f99-170043139c33"
                },
                {
                    "uuid": "39a7a376-03a5-39a1-86ba-7d80930436ff"
                },
                {
                    "uuid": "1c4b1b1d-74ea-332b-863b-60568f5a1266"
                },
                {
                    "uuid": "08e90f0c-34f5-3df7-8f34-414165a94dde"
                },
                {
                    "uuid": "668c58be-31cb-34c1-85a0-bebcddf27b5a"
                },
                {
                    "uuid": "1931d911-d3b7-3fa3-9a72-5f7286cd44d6"
                },
                {
                    "uuid": "1ab39e53-2bfe-3676-9e9f-2f0844aef722"
                },
                {
                    "uuid": "85e3c850-df0e-3260-ac8b-bf4b05b3cffb"
                },
                {
                    "uuid": "90db6473-6ce7-3484-ad47-31e37a45a243"
                },
              

]


In [183]:
# Need this range in order to obtain all news
range(len(stocks_news[0]['items']['result']))

range(0, 10)

In [200]:
# Creating a news data frame from the json output
# https://python-forum.io/Thread-build-pandas-dataframe-from-a-for-loop

# Data Frame
stocks_news_df = pd.DataFrame({"title": [], "link": [], "summary": [
], "publisher": [], "date": [], "symbol": [], 'name': []})

# Initial section to retrieve data 
stock_news_data = stocks_news[0]['items']['result']

for news in range(len(stock_news_data)):
    stocks_news_df = stocks_news_df.append({'title': stock_news_data[news]['title'], 'link': stock_news_data[news]['link'], 'summary': stock_news_data[news]['summary'],'publisher': stock_news_data[news]['publisher'],'date': stock_news_data[news]['published_at'], 'symbol': stock_news_data[news]['entities'][0]['term'], "name": stock_news_data[news]['entities'][0]['label'] }, ignore_index=True)

#     stocks_news_df = stocks_news_df.append(
#         {'title': stocks_news[0]['items']['result'][news]['title'], 'link': stocks_news[0]['items']['result'][news]['link'], 'summary': stocks_news[0]['items']['result'][news]['summary'],'publisher': stocks_news[0]['items']['result'][news]['publisher'],'date': stocks_news[0]['items']['result'][news]['published_at']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'link': stocks_news[0]['items']['result'][news]['link']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'summary': stocks_news[0]['items']['result'][news]['summary']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'publisher': stocks_news[0]['items']['result'][news]['publisher']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'date': stocks_news[0]['items']['result'][news]['published_at']}, ignore_index=True)

    #     stock_news_data.append(stocks_news[0]['items']['result'][news]['link'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['summary'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['publisher'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['published_at'])

# Print Data Frame
stocks_news_df

# Save raw data in excel
stocks_news_df.to_csv("stocks_data/raw_stock_news_data.csv", index=False, encoding='utf8')

## Extracting Financial Statements

In [195]:
# API for financials

financials_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-financials"

#symbols22 = ["AIG", "SLB"]

querystring_financials = {"symbol":"AIG"}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_financials = []

stock_financials_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for financials in tqdm(querystring):

    try: 
        response4 = requests.get(financials_url,headers = headers, params=querystring_financials).json()
        stocks_financials.append(response4)
        
        print(f"Data Acquisition {stock_financials_counter} | Stock Symbol : {querystring_financials}")
        
        stock_financials_counter = stock_financials_counter + 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")  

Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Data Acquisition 1 | Stock Symbol : {'symbol': 'AIG'}
Data Acquisition 2 | Stock Symbol : {'symbol': 'AIG'}
Data Acquisition 3 | Stock Symbol : {'symbol': 'AIG'}
Data Acquisition 4 | Stock Symbol : {'symbol': 'AIG'}
Data Acquisition 5 | Stock Symbol : {'symbol': 'AIG'}

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [196]:
# Print in json format

stocks_financials
print(json.dumps(stocks_financials, indent =4))

[
    {
        "cashflowStatementHistory": {
            "cashflowStatements": [
                {
                    "investments": {
                        "raw": 11298000000,
                        "fmt": "11.3B",
                        "longFmt": "11,298,000,000"
                    },
                    "changeToLiabilities": {
                        "raw": 0,
                        "fmt": null,
                        "longFmt": "0"
                    },
                    "totalCashflowsFromInvestingActivities": {
                        "raw": -223000000,
                        "fmt": "-223M",
                        "longFmt": "-223,000,000"
                    },
                    "netBorrowings": {
                        "raw": 1062000000,
                        "fmt": "1.06B",
                        "longFmt": "1,062,000,000"
                    },
                    "totalCashFromFinancingActivities": {
                        "raw": 794000000,
           

]


In [197]:
# Create empty list to store data

balance_sheets = []
cashflow_sheets = []
income_sheets = []

# had to input -1 because list index out of range error
# https://stackoverflow.com/questions/37619848/python-loop-list-index-out-of-range/37620170

for x in range(len(stocks_financials)-1):
    #print(stocks_financials[x]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])
    balance_sheets.append(stocks_financials[x]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])


for y in range(len(stocks_financials)-1):
    #print(stocks_financials[y]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
    cashflow_sheets.append(stocks_financials[y]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
    
for z in range(len(stocks_financials)-1):
    #print(stocks_financials[z]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])
    income_sheets.append(stocks_financials[z]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])

In [198]:
# Obtain all data with list comprehensions so it can be put in a data frame

balance_sheets = [balance_sheets[xx] for xx in range(len(balance_sheets))]
balance_sheets

cashflow_sheets = [cashflow_sheets[yy] for yy in range(len(cashflow_sheets))]
cashflow_sheets

income_sheets = [income_sheets[0][zz] for zz in range(len(income_sheets))]
income_sheets

balance_sheets


[{'intangibleAssets': {'raw': 11386000000,
   'fmt': '11.39B',
   'longFmt': '11,386,000,000'},
  'capitalSurplus': {'raw': 81211000000,
   'fmt': '81.21B',
   'longFmt': '81,211,000,000'},
  'totalLiab': {'raw': 456164000000,
   'fmt': '456.16B',
   'longFmt': '456,164,000,000'},
  'totalStockholderEquity': {'raw': 64054000000,
   'fmt': '64.05B',
   'longFmt': '64,054,000,000'},
  'minorityInterest': {'raw': 1566000000,
   'fmt': '1.57B',
   'longFmt': '1,566,000,000'},
  'otherCurrentLiab': {'raw': 27781000000,
   'fmt': '27.78B',
   'longFmt': '27,781,000,000'},
  'totalAssets': {'raw': 522269000000,
   'fmt': '522.27B',
   'longFmt': '522,269,000,000'},
  'endDate': {'raw': 1561852800, 'fmt': '2019-06-30'},
  'commonStock': {'raw': 4766000000,
   'fmt': '4.77B',
   'longFmt': '4,766,000,000'},
  'otherCurrentAssets': {'raw': 44032000000,
   'fmt': '44.03B',
   'longFmt': '44,032,000,000'},
  'retainedEarnings': {'raw': 22077000000,
   'fmt': '22.08B',
   'longFmt': '22,077,000,000

In [199]:
# Create Data Frames
balance_sheets_df = pd.DataFrame(balance_sheets)
balance_sheets_df


cashflow_sheets_df = pd.DataFrame(cashflow_sheets)
cashflow_sheets_df

income_sheets_df = pd.DataFrame(income_sheets)
income_sheets_df

# Saving in CSV data

balance_sheets_df.to_csv("stocks_data/raw_balance_sheets_data.csv", index=False, encoding='utf8')
cashflow_sheets_df.to_csv("stocks_data/raw_cashflow_sheets_data.csv", index=False, encoding='utf8')
income_sheets_df.to_csv("stocks_data/raw_income_sheets_data.csv", index=False, encoding='utf8')