In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv
from tqdm import tqdm_notebook as tqdm
from bs4 import BeautifulSoup as bs
from splinter import Browser


## Scrape/Extract Fortune 500 table

In [2]:
# Use Pandas to scrape the table containing stocks data.
url_stock = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

# Convert table to html
stock_table = pd.read_html(url_stock)

# Print table 
print(stock_table)

[    Symbol                         Security SEC filings  \
0      MMM                       3M Company     reports   
1      ABT              Abbott Laboratories     reports   
2     ABBV                      AbbVie Inc.     reports   
3     ABMD                      ABIOMED Inc     reports   
4      ACN                    Accenture plc     reports   
5     ATVI              Activision Blizzard     reports   
6     ADBE                Adobe Systems Inc     reports   
7      AMD       Advanced Micro Devices Inc     reports   
8      AAP               Advance Auto Parts     reports   
9      AES                         AES Corp     reports   
10     AMG    Affiliated Managers Group Inc     reports   
11     AFL                        AFLAC Inc     reports   
12       A         Agilent Technologies Inc     reports   
13     APD     Air Products & Chemicals Inc     reports   
14    AKAM          Akamai Technologies Inc     reports   
15     ALK             Alaska Air Group Inc     reports

[216 rows x 6 columns]]


In [5]:
# Identify what type is the output data
type(stock_table)

# Ensure to select the first part of the list
stock_table_df = pd.DataFrame(stock_table[0])

# Save raw data to csv
stock_table_df.to_csv("../stocks_data/raw_fortune500table.csv", index=False, encoding='utf8')

stock_table_df.head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,ABIOMED Inc,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [None]:
# Make he Symbol column to a list 
symbols_list = stock_table_df["Symbol"].values.tolist()
symbols_list

## Extract Stocks information for fortune 500

In [None]:
base_url = "https://cloud.iexapis.com/stable/stock/"
key_token = "/quote?token=pk_e4c3fa15e49a4efca388fcfbcece0eac"

# Empty list to store data
stocks_data = []

# Start counter for the loop

symbol_counter = 1

symbols_list2 = ['MMM','ABT','AIG','HAL','SLB']

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for symbol in tqdm(symbols_list): #Note that we are using the symbol list
    try: 
        response = requests.get(base_url+symbol+key_token).json()
        stocks_data.append(response)
        
        print(f"Data Acquisition {symbol_counter} | Stock Symbol : {symbol}")
        symbol_counter = symbol_counter + 1
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")      


In [None]:
# Show data extracted
stocks_data

In [None]:
# Convert data extracted to data frame
stocks_data_df = pd.DataFrame(stocks_data)
stocks_data_df

# Save in csv file
stocks_data_df.to_csv("stocks_data/raw_stock_data_fortune500.csv", index=False, encoding='utf8')



In [None]:
# # Display data saved
stocks_data_df

In [None]:
symbols22 = ["AIG", "SLB"]
querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": symbols22}


In [None]:
#querystring['symbol'][0]
symbols22[1]

## Extract Historical Stocks Data

### Need to figure out how to put it in a list

In [30]:
############# This one is good ################

historical_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-historical-data"

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_history = []

stock_history_counter = 1

symbol_list1 = ['AIG', 'MMM', 'ABT', 'ACN']

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for s in symbol_list1:
    querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": s}

    try: 
        response2 = requests.get(historical_url,headers = headers, params=querystring).json()
        stocks_history.append({s:response2})
        
        print(f"Data Acquisition {stock_history_counter} | Stock Symbol : {querystring}")
        
        stock_history_counter = stock_history_counter+ 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")      



Beginning Data Acquisition!!!
------------&&&&-------------------
Data Acquisition 1 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'AIG'}
Data Acquisition 2 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'MMM'}
Data Acquisition 3 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'ABT'}
Data Acquisition 4 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'ACN'}
------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [36]:
stocks_history[2]

{'ABT': {'prices': [{'date': 1567396800,
    'open': 84.43000030517578,
    'high': 84.63999938964844,
    'low': 83.26000213623047,
    'close': 84.05999755859375,
    'volume': 4060600,
    'adjclose': 84.05999755859375},
   {'date': 1566792000,
    'open': 82.61000061035156,
    'high': 85.62000274658203,
    'low': 82.0199966430664,
    'close': 85.31999969482422,
    'volume': 19642400,
    'adjclose': 85.31999969482422},
   {'date': 1566187200,
    'open': 85.79000091552734,
    'high': 86.12999725341797,
    'low': 81.36000061035156,
    'close': 81.93000030517578,
    'volume': 21231900,
    'adjclose': 81.93000030517578},
   {'date': 1565582400,
    'open': 86.38999938964844,
    'high': 87.0999984741211,
    'low': 83.5199966430664,
    'close': 84.77999877929688,
    'volume': 23143900,
    'adjclose': 84.77999877929688},
   {'date': 1564977600,
    'open': 84.7300033569336,
    'high': 87.05000305175781,
    'low': 82.30999755859375,
    'close': 86.62000274658203,
    'vol

In [26]:
## Need to figure out how to put it in a list

historical_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-historical-data"

#symbols22 = "AIG", "SLB"

querystring = {"frequency":"1wk","filter":"history","period1":"1136219674","period2":"1567528474","symbol": "AIG"}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_history = []

stock_history_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for stock in tqdm(querystring):

    try: 
        response2 = requests.get(historical_url,headers = headers, params=querystring).json()
        stocks_history.append(response2)
        
        print(f"Data Acquisition {stock_history_counter} | Stock Symbol : {querystring}")
        stock_history_counter = stock_history_counter+ 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")      



Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Stock not found...!!
Data Acquisition 1 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'AIG'}
Data Acquisition 2 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'AIG'}
Data Acquisition 3 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'AIG'}
Data Acquisition 4 | Stock Symbol : {'frequency': '1wk', 'filter': 'history', 'period1': '1136219674', 'period2': '1567528474', 'symbol': 'AIG'}

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [27]:
stocks_history
print(json.dumps(stocks_history, indent =4))

[
    {
        "prices": [
            {
                "date": 1567396800,
                "open": 51.75,
                "high": 52.4900016784668,
                "low": 51.650001525878906,
                "close": 52.400001525878906,
                "volume": 2897700,
                "adjclose": 52.400001525878906
            },
            {
                "date": 1566792000,
                "open": 52.709999084472656,
                "high": 53.470001220703125,
                "low": 51.79999923706055,
                "close": 52.040000915527344,
                "volume": 14928000,
                "adjclose": 52.040000915527344
            },
            {
                "date": 1566187200,
                "open": 55.029998779296875,
                "high": 55.20000076293945,
                "low": 52.150001525878906,
                "close": 52.4900016784668,
                "volume": 15374200,
                "adjclose": 52.4900016784668
            },
            {
        

]


In [None]:
# Pulling "prices" history  and "eventsData" keys
stock_history_prices = []

for price in range(len(stocks_history)):
    #print(stocks_history[0]['prices'])
    stock_history_prices.append(stocks_history[0]['prices'])

In [None]:
# Create data frame

stock_history_prices_df =  pd.DataFrame(stock_history_prices[0])
stock_history_prices_df.head()

In [None]:
# Pulling "eventsData" history aka dividend 

stock_history_div = []

for div in range(len(stocks_history)):
    #print(stocks_history[0]['eventsData'])
    stock_history_div.append(stocks_history[0]['eventsData'])



In [None]:
# Create data frame

stock_history_div_df = pd.DataFrame(stock_history_div[0])
stock_history_div_df 

## Extract News Data

### Need to figure out how to put in a list and loop

In [None]:
# Getting the News

news_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/get-news"

# symbols22 = ["AIG", "SLB"]

querystring_news = {"region":"US","category":"AIG"}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_news = []

stock_news_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for news in tqdm(querystring):

    try: 
        response3 = requests.get(news_url,headers = headers, params= querystring_news).json()
        stocks_news.append(response3)
        
        #print(f"Data Acquisition {stock_news_counter} | Stock Symbol : {querystring_news}")
        
        stock_news_counter = stock_news_counter + 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")    


In [None]:
stocks_news
print(json.dumps(stocks_news, indent =4))

In [None]:
# Need this range in order to obtain all news
range(len(stocks_news[0]['items']['result']))

In [None]:
# Creating a news data frame from the json output
# https://python-forum.io/Thread-build-pandas-dataframe-from-a-for-loop

# Data Frame
stocks_news_df = pd.DataFrame({"title": [], "link": [], "summary": [
], "publisher": [], "date": [], "symbol": [], 'name': []})

# Initial section to retrieve data 
stock_news_data = stocks_news[0]['items']['result']

for news in range(len(stock_news_data)):
    stocks_news_df = stocks_news_df.append({'title': stock_news_data[news]['title'], 'link': stock_news_data[news]['link'], 'summary': stock_news_data[news]['summary'],'publisher': stock_news_data[news]['publisher'],'date': stock_news_data[news]['published_at'], 'symbol': stock_news_data[news]['entities'][0]['term'], "name": stock_news_data[news]['entities'][0]['label'] }, ignore_index=True)

#     stocks_news_df = stocks_news_df.append(
#         {'title': stocks_news[0]['items']['result'][news]['title'], 'link': stocks_news[0]['items']['result'][news]['link'], 'summary': stocks_news[0]['items']['result'][news]['summary'],'publisher': stocks_news[0]['items']['result'][news]['publisher'],'date': stocks_news[0]['items']['result'][news]['published_at']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'link': stocks_news[0]['items']['result'][news]['link']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'summary': stocks_news[0]['items']['result'][news]['summary']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'publisher': stocks_news[0]['items']['result'][news]['publisher']}, ignore_index=True)
#     stocks_news_df = stocks_news_df.append(
#         {'date': stocks_news[0]['items']['result'][news]['published_at']}, ignore_index=True)

    #     stock_news_data.append(stocks_news[0]['items']['result'][news]['link'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['summary'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['publisher'])
#     stock_news_data.append(stocks_news[0]['items']['result'][news]['published_at'])

# Print Data Frame
stocks_news_df

# Save raw data in excel
stocks_news_df.to_csv("stocks_data/raw_stock_news_data.csv", index=False, encoding='utf8')

## Extracting Financial Statements

In [47]:
# API for financials

financials_url = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/stock/v2/get-financials"

#symbols22 = ["AIG", "SLB"]

querystring_financials = {"symbol":"AIG"}

headers = {
    'x-rapidapi-host': "apidojo-yahoo-finance-v1.p.rapidapi.com",
    'x-rapidapi-key': "09ec0160admsh2d5563a7feaa3efp1e2db5jsn5787845ed7fc"
    }

stocks_financials = []

stock_financials_counter = 1

#Print statement to inform when data acquisition start
print(f"Beginning Data Acquisition!!!")
print(f"------------&&&&-------------------")

for financials in tqdm(querystring_financials):

    try: 
        response4 = requests.get(financials_url,headers = headers, params=querystring_financials).json()
        stocks_financials.append(response4)
        
        print(f"Data Acquisition {stock_financials_counter} | Stock Symbol : {querystring_financials}")
        
        stock_financials_counter = stock_financials_counter + 1
    
    except:
        print("Stock not found...!!")
    continue
    
# Print statement to inform when all records have been acquired.
print(f"------------&&&&---------------")
print(f"Data Acquisition Complete")
print(f"------------&&&&---------------")  

Beginning Data Acquisition!!!
------------&&&&-------------------


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Data Acquisition 1 | Stock Symbol : {'symbol': 'AIG'}

------------&&&&---------------
Data Acquisition Complete
------------&&&&---------------


In [48]:
# Print in json format

stocks_financials
print(json.dumps(stocks_financials, indent =4))

[
    {
        "cashflowStatementHistory": {
            "cashflowStatements": [
                {
                    "investments": {
                        "raw": 11298000000,
                        "fmt": "11.3B",
                        "longFmt": "11,298,000,000"
                    },
                    "changeToLiabilities": {
                        "raw": 0,
                        "fmt": null,
                        "longFmt": "0"
                    },
                    "totalCashflowsFromInvestingActivities": {
                        "raw": -223000000,
                        "fmt": "-223M",
                        "longFmt": "-223,000,000"
                    },
                    "netBorrowings": {
                        "raw": 1062000000,
                        "fmt": "1.06B",
                        "longFmt": "1,062,000,000"
                    },
                    "totalCashFromFinancingActivities": {
                        "raw": 794000000,
           

]


In [50]:
stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][0]

{'intangibleAssets': {'raw': 11386000000,
  'fmt': '11.39B',
  'longFmt': '11,386,000,000'},
 'capitalSurplus': {'raw': 81211000000,
  'fmt': '81.21B',
  'longFmt': '81,211,000,000'},
 'totalLiab': {'raw': 456164000000,
  'fmt': '456.16B',
  'longFmt': '456,164,000,000'},
 'totalStockholderEquity': {'raw': 64054000000,
  'fmt': '64.05B',
  'longFmt': '64,054,000,000'},
 'minorityInterest': {'raw': 1566000000,
  'fmt': '1.57B',
  'longFmt': '1,566,000,000'},
 'otherCurrentLiab': {'raw': 27781000000,
  'fmt': '27.78B',
  'longFmt': '27,781,000,000'},
 'totalAssets': {'raw': 522269000000,
  'fmt': '522.27B',
  'longFmt': '522,269,000,000'},
 'endDate': {'raw': 1561852800, 'fmt': '2019-06-30'},
 'commonStock': {'raw': 4766000000,
  'fmt': '4.77B',
  'longFmt': '4,766,000,000'},
 'otherCurrentAssets': {'raw': 44032000000,
  'fmt': '44.03B',
  'longFmt': '44,032,000,000'},
 'retainedEarnings': {'raw': 22077000000,
  'fmt': '22.08B',
  'longFmt': '22,077,000,000'},
 'otherLiab': {'raw': 31103

In [58]:
# Create empty list to store data

balance_sheets = []
cashflow_sheets = []
income_sheets = []

# had to input -1 because list index out of range error
# https://stackoverflow.com/questions/37619848/python-loop-list-index-out-of-range/37620170

for x in range(len(stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"])):
    #print(stocks_financials[x]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])
    balance_sheets.append(stocks_financials[0]['balanceSheetHistoryQuarterly']["balanceSheetStatements"][x])


# for y in range(len(stocks_financials)-1):
#     #print(stocks_financials[y]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
#     cashflow_sheets.append(stocks_financials[0]['cashflowStatementHistoryQuarterly']["cashflowStatements"][y])
    
# for z in range(len(stocks_financials)-1):
#     #print(stocks_financials[z]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])
#     income_sheets.append(stocks_financials[0]['incomeStatementHistoryQuarterly']["incomeStatementHistory"])

In [64]:
# balance_sheets[1]
# # Create Data Frames
# balance_sheets_df = pd.DataFrame(balance_sheets)
# balance_sheets_df

balance_list = []
for xxx in range(len(balance_sheets)):
    balancex = pd.DataFrame.from_dict(balance_sheets[xxx],orient='columns')
    balance_list.append(balancex)



In [69]:
balance_list[0]

Unnamed: 0,intangibleAssets,capitalSurplus,totalLiab,totalStockholderEquity,minorityInterest,otherCurrentLiab,totalAssets,endDate,commonStock,otherCurrentAssets,...,shortLongTermDebt,otherStockholderEquity,totalCurrentAssets,longTermInvestments,netTangibleAssets,shortTermInvestments,netReceivables,maxAge,longTermDebt,accountsPayable
fmt,11.39B,81.21B,456.16B,64.05B,1.57B,27.78B,522.27B,2019-06-30,4.77B,44.03B,...,1.24B,4.99B,74.58B,267.07B,64.05B,15B,12.61B,1,35.05B,81.06B
longFmt,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000
raw,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,1561852800,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000


In [71]:
balance_list_df = balance_list[0].append(balance_list[1])
balance_list_df

Unnamed: 0,intangibleAssets,capitalSurplus,totalLiab,totalStockholderEquity,minorityInterest,otherCurrentLiab,totalAssets,endDate,commonStock,otherCurrentAssets,...,shortLongTermDebt,otherStockholderEquity,totalCurrentAssets,longTermInvestments,netTangibleAssets,shortTermInvestments,netReceivables,maxAge,longTermDebt,accountsPayable
fmt,11.39B,81.21B,456.16B,64.05B,1.57B,27.78B,522.27B,2019-06-30,4.77B,44.03B,...,1.24B,4.99B,74.58B,267.07B,64.05B,15B,12.61B,1,35.05B,81.06B
longFmt,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000
raw,11386000000,81211000000,456164000000,64054000000,1566000000,27781000000,522269000000,1561852800,4766000000,44032000000,...,1241000000,4991000000,74579000000,267069000000,64054000000,14998000000,12614000000,1,35050000000,81057000000
fmt,12.13B,81.15B,450.83B,60.3B,1.31B,26.9B,512.92B,2019-03-31,4.77B,44.55B,...,1.25B,2.13B,70.65B,260.69B,60.3B,10.88B,12.65B,1,34.52B,82.5B
longFmt,12128000000,81148000000,450829000000,60302000000,1306000000,26896000000,512922000000,,4766000000,44546000000,...,1253000000,2128000000,70648000000,260692000000,60302000000,10882000000,12655000000,1,34523000000,82496000000
raw,12128000000,81148000000,450829000000,60302000000,1306000000,26896000000,512922000000,1553990400,4766000000,44546000000,...,1253000000,2128000000,70648000000,260692000000,60302000000,10882000000,12655000000,1,34523000000,82496000000


In [15]:
# Obtain all data with list comprehensions so it can be put in a data frame

balance_sheets = [balance_sheets[xx] for xx in range(len(balance_sheets))]
balance_sheets

cashflow_sheets = [cashflow_sheets[yy] for yy in range(len(cashflow_sheets))]
cashflow_sheets

income_sheets = [income_sheets[0][zz] for zz in range(len(income_sheets))]
income_sheets

balance_sheets


[]

In [12]:
balance_sheets

[]

In [None]:
# Create Data Frames
balance_sheets_df = pd.DataFrame(balance_sheets)
balance_sheets_df


cashflow_sheets_df = pd.DataFrame(cashflow_sheets)
cashflow_sheets_df

income_sheets_df = pd.DataFrame(income_sheets)
income_sheets_df

# Saving in CSV data

# balance_sheets_df.to_csv("stocks_data/raw_balance_sheets_data.csv", index=False, encoding='utf8')
# cashflow_sheets_df.to_csv("stocks_data/raw_cashflow_sheets_data.csv", index=False, encoding='utf8')
# income_sheets_df.to_csv("stocks_data/raw_income_sheets_data.csv", index=False, encoding='utf8')