In [131]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# define the base url needed to create the file url.
base_url = r"https://www.sec.gov"

cik_number = "320193"

# convert a normal url to a document url
normal_url = r"https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/0000320193-20-000096.txt"
normal_url = normal_url.replace('0000320193-20-000096','').replace('.txt','/index.json')

# define a url that leads to a 10k document landing page
documents_url = r"https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/index.json"
print(documents_url)

# request the url and decode it.
content = requests.get(documents_url).json()

for file in content['directory']['item']:
    
    # Grab the filing summary and create a new url leading to the file so we can download it.
    if file['name'] == 'FilingSummary.xml':

        xml_summary = base_url + content['directory']['name'] + "/" + file['name']
        
        print('-' * 100)
        print('File Name: ' + file['name'])
        print('File Path: ' + xml_summary)


https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/index.json
----------------------------------------------------------------------------------------------------
File Name: FilingSummary.xml
File Path: https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/FilingSummary.xml


In [132]:
base_url = xml_summary.replace('FilingSummary.xml', '')

# print(base_url)

# request and parse the content
content = requests.get(xml_summary).content
soup = BeautifulSoup(content, 'lxml')         # Format to xml

# print(soup)
# find the 'myreports' tag because this contains all the individual reports submitted.
reports = soup.find('myreports')

# I want a list to store all the individual components of the report, so create the master list.
master_reports = []

# loop through each report in the 'myreports' tag but avoid the last one as this will cause an error.
for report in reports.find_all('report')[:-1]:

    # let's create a dictionary to store all the different parts we need.
    report_dict = {}
    report_dict['name_short'] = report.shortname.text
    report_dict['name_long'] = report.longname.text
    report_dict['position'] = report.position.text
    report_dict['category'] = report.menucategory.text
    report_dict['url'] = base_url + report.htmlfilename.text

    # append the dictionary to the master list.
    master_reports.append(report_dict)

    # print the info to the user.
    print('-'*100)
    # if (report.htmlfilename.text == "R11.htm"):
    print(base_url + report.htmlfilename.text)
    print(report.longname.text)
    print(report.shortname.text)
    print(report.menucategory.text)
    print(report.position.text)

----------------------------------------------------------------------------------------------------
https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/R1.htm
0001001 - Document - Cover Page
Cover Page
Cover
1
----------------------------------------------------------------------------------------------------
https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/R2.htm
1001002 - Statement - CONSOLIDATED STATEMENTS OF OPERATIONS
CONSOLIDATED STATEMENTS OF OPERATIONS
Statements
2
----------------------------------------------------------------------------------------------------
https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/R3.htm
1002003 - Statement - CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME
CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME
Statements
3
----------------------------------------------------------------------------------------------------
https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/R4.htm
1003004 - S

In [133]:
# create the list to hold the statement urls
# statements_url = []

# for report_dict in master_reports:
    
#     # define the statements we want to look for.
#     item1 = r"CONSOLIDATED STATEMENTS OF OPERATIONS"
#     item2 = r"CONSOLIDATED BALANCE SHEETS"
#     item3 = r"CONSOLIDATED STATEMENTS OF CASH FLOWS"
#     # item4 = r"Consolidated Statements of Stockholder's (Deficit) Equity"
    
#     # store them in a list.
#     report_list = [item1, item2, item3]
    
#     # if the short name can be found in the report list.
#     if report_dict['name_short'] in report_list:
        
#         # print some info and store it in the statements url.
#         print('-'*100)
#         print(report_dict['name_short'])
#         print(report_dict['url'])
        
#         statements_url.append(report_dict['url'])

In [134]:
# create the list to hold the statement urls
statements_url = []

statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019318000007/R7.htm')     # Q1 2017 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019318000070/R7.htm')     # Q2 2018 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019318000100/R7.htm')     # Q3 2018 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019318000145/R9.htm')     # Y 2018 Consolidated Statement of Cash Flows

statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019319000010/R7.htm')     # Q1 2018 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019319000066/R7.htm')     # Q2 2019 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019319000076/R7.htm')     # Q3 2019 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/R7.htm')     # Y 2019 Consolidated Statement of Cash Flows

statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019320000010/R7.htm')     # Q1 2019 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019320000052/R7.htm')     # Q2 2020 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019320000062/R7.htm')     # Q3 2020 Consolidated Statement of Cash Flows
statements_url.append('https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/R7.htm')     # Y 2020 Consolidated Statement of Cash Flows

In [135]:
# let's assume we want all the statements in a single data set.
statements_data = []

# loop through each statement url
for statement in statements_url:

    # define a dictionary that will store the different parts of the statement.
    statement_data = {}
    statement_data['headers'] = []
    statement_data['sections'] = []
    statement_data['data'] = []
    
    # request the statement file content
    content = requests.get(statement).content
    report_soup = BeautifulSoup(content, 'html')

    # find all the rows, figure out what type of row it is, parse the elements, and store in the statement file list.
    for index, row in enumerate(report_soup.table.find_all('tr')):
        
        # first let's get all the elements.
        cols = row.find_all('td')
        
        # if it's a regular row and not a section or a table header
        if (len(row.find_all('th')) == 0 and len(row.find_all('strong')) == 0): 
            reg_row = [ele.text.strip() for ele in cols]
            statement_data['data'].append(reg_row)
            
        # if it's a regular row and a section but not a table header
        elif (len(row.find_all('th')) == 0 and len(row.find_all('strong')) != 0):
            sec_row = cols[0].text.strip()
            statement_data['sections'].append(sec_row)
            
        # finally if it's not any of those it must be a header
        elif (len(row.find_all('th')) != 0):            
            hed_row = [ele.text.strip() for ele in row.find_all('th')]
            statement_data['headers'].append(hed_row)
            
        else:            
            print('We encountered an error.')

    # append it to the master list.
    statements_data.append(statement_data)
    print(statement_data)

{'headers': [['CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS (Unaudited) - USD ($) $ in Millions', '3 Months Ended'], ['Dec. 30, 2017', 'Dec. 31, 2016']], 'sections': ['Statement of Cash Flows [Abstract]', 'Operating activities:', 'Adjustments to reconcile net income to cash generated by operating activities:', 'Changes in operating assets and liabilities:', 'Investing activities:', 'Financing activities:', 'Supplemental cash flow disclosure:'], 'data': [['Cash and cash equivalents, beginning of the period', '$ 20,289', '$ 20,484'], ['Net income', '20,065', '17,891'], ['Depreciation and amortization', '2,745', '2,987'], ['Share-based compensation expense', '1,296', '1,256'], ['Deferred income tax expense/(benefit)', '(33,737)', '1,452'], ['Other', '(11)', '(274)'], ['Accounts receivable, net', '(5,570)', '1,697'], ['Inventories', '434', '(580)'], ['Vendor non-trade receivables', '(9,660)', '(375)'], ['Other current and non-current assets', '(197)', '(1,446)'], ['Accounts payable', '1

In [136]:
pd.DataFrame(statement_data['data'])


Unnamed: 0,0,1,2,3
0,"Cash, cash equivalents and restricted cash, be...","$ 50,224","$ 25,913","$ 20,289"
1,Net income,57411,55256,59531
2,Depreciation and amortization,11056,12547,10903
3,Share-based compensation expense,6829,6068,5340
4,Deferred income tax benefit,(215),(340),"(32,590)"
5,Other,(97),(652),(444)
6,"Accounts receivable, net",6917,245,"(5,322)"
7,Inventories,(127),(289),828
8,Vendor non-trade receivables,1553,2931,"(8,010)"
9,Other current and non-current assets,"(9,588)",873,(423)


In [None]:
d = {}
# [i[‘sections’] for i in statements_data]
for i in statements_data:
  for j in i['data']:
    if j[0] not in d.keys():
      d[j[0]] = [j[1]]
    else:
      d[j[0]].append(j[1])
d

In [143]:
dd = {}
for i in d:
  if len(d[i]) == 12:
    dd[i] = d[i]

In [138]:
# [statement_data['headers'][1][0] for statement_data in statements_data]
# feature_data = {}
# features = []
# for feature in statements_data[0]['data']:
#   features.append(feature[0])
#   feature_data[feature[0]] = [];


# for stmt_data in statements_data:
#   for feat in stmt_data['data']:
#     feature_data[feat].append(feat[1])
# feature_data

# for statement in statements_data:
#   for i in range(len(statement['data'])):
#     print(statement['data'][i])
    # feature_data.fromkeys()
# features
# pd.DataFrame([stmt_data['data'] for stmt_data in statements_data])


In [139]:
# income_headers
# income_headers = []
# income_features = []
# income_features_data = [[]]
# income_sections = []
# for statement_data in statements_data:
#   income_headers.append((statement_data)['headers'][1][0])
#   income_data.append(statement_data['data'])
#   for feature in (statement_data)['data']:
#     income_features_data.append(feature[1])

# for feature in (statements_data[0])['data']:
#   income_features.append(feature[0])
  
# income_features


In [140]:
income_df = pd.DataFrame(income_data)

income_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107
0,"Cash, cash equivalents and restricted cash, be...","$ 50,224","$ 25,913","$ 20,289",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Net income,57411,55256,59531,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Depreciation and amortization,11056,12547,10903,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Share-based compensation expense,6829,6068,5340,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Deferred income tax benefit,(215),(340),"(32,590)",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,"[Cash, cash equivalents and restricted cash, b...","[Net income, 55,256, 59,531, 48,351]","[Depreciation and amortization, 12,547, 10,903...","[Share-based compensation expense, 6,068, 5,34...","[Deferred income tax expense/(benefit), (340),...","[Other, (652), (444), (166)]","[Accounts receivable, net, 245, (5,322), (2,093)]","[Inventories, (289), 828, (2,723)]","[Vendor non-trade receivables, 2,931, (8,010),...","[Other current and non-current assets, 873, (4...","[Accounts payable, (1,923), 9,175, 8,966]","[Deferred revenue, (625), (3), (593)]","[Other current and non-current liabilities, (4...","[Cash generated by operating activities, 69,39...","[Purchases of marketable securities, (39,630),...",[Proceeds from maturities of marketable securi...,"[Proceeds from sales of marketable securities,...","[Payments for acquisition of property, plant a...",[Payments made in connection with business acq...,"[Purchases of non-marketable securities, (1,00...","[Proceeds from non-marketable securities, 1,63...","[Other, (1,078), (745), (124)]",[Cash generated by/(used in) investing activit...,"[Proceeds from issuance of common stock, 781, ...",[Payments for taxes related to net share settl...,[Payments for dividends and dividend equivalen...,"[Repurchases of common stock, (66,897), (72,73...","[Proceeds from issuance of term debt, net, 6,9...","[Repayments of term debt, (8,805), (6,500), (3...",[Proceeds from/(Repayments of) commercial pape...,"[Other, (105), 0, 0]","[Cash used in financing activities, (90,976), ...","[Increase/(Decrease) in cash, cash equivalents...","[Cash, cash equivalents and restricted cash, e...","[Cash paid for income taxes, net, 15,263, 10,4...","[Cash paid for interest, $ 3,423, $ 3,022, $ 2...",,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
104,"[Cash, cash equivalents and restricted cash, b...","[Net income, 22,236, 19,965]","[Depreciation and amortization, 2,816, 3,395]","[Share-based compensation expense, 1,710, 1,559]","[Deferred income tax expense/(benefit), (349),...","[Other, (142), (54)]","[Accounts receivable, net, 2,015, 5,130]","[Inventories, (28), (1,076)]","[Vendor non-trade receivables, 3,902, 6,905]","[Other current and non-current assets, (7,054)...","[Accounts payable, (1,089), (8,501)]","[Deferred revenue, 985, (370)]","[Other current and non-current liabilities, 5,...","[Cash generated by operating activities, 30,51...","[Purchases of marketable securities, (37,416),...",[Proceeds from maturities of marketable securi...,"[Proceeds from sales of marketable securities,...","[Payments for acquisition of property, plant a...",[Payments made in connection with business acq...,"[Purchases of non-marketable securities, (77),...","[Other, (130), (56)]",[Cash generated by/(used in) investing activit...,"[Proceeds from issuance of common stock, 2, 0]",[Payments for taxes related to net share settl...,[Payments for dividends and dividend equivalen...,"[Repurchases of common stock, (20,706), (8,796)]","[Proceeds from issuance of term debt, net, 2,2...","[Repayments of term debt, (1,000), 0]",[Proceeds from/(Repayments of) commercial pape...,"[Other, (16), 0]","[Cash used in financing activities, (25,407), ...","[Increase/(Decrease) in cash, cash equivalents...","[Cash, cash equivalents and restricted cash, e...","[Cash paid for income taxes, net, 4,393, 4,916]","[Cash paid for interest, $ 771, $ 836]",,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
105,"[Cash, cash equivalents and restricted cash, b...","[Net income, 33,485, 31,526]","[Depreciation and amortization, 5,602, 6,435]","[Share-based compensation expense, 3,407, 3,073]","[Deferred income tax benefit, (651), (124)]","[Other, (259), (215)]","[Accounts receivable, net, 7,284, 8,094]","[Inventories, 699, (1,006)]","[Vendor non-trade receivables, 7,923, 14,616]","[Other current and non-current assets, (8,866)...","[Accounts payable, (13,520), (20,024)]","[Deferred revenue, 1,223, (540)]","[Other current and non-current liabilities, 7,...","[Cash generated by operating activities, 43,82...","[Purchases of marketable securities, (66,489),...",[Proceeds from maturities of marketable securi...,"[Proceeds from sales of marketable securities,...","[Payments for acquisition of property, plant a...",[Payments made in connection with business acq...,"[Purchases of non-marketable securities, (146)...","[Other, (426), 30]",[Cash generated by/(used in) investing activit...,"[Proceeds from issuance of common stock, 430, ...",[Payments for taxes related to net share settl...,[Payments for dividends and dividend equivalen...,"[Repurchases of common stock, (39,280), (32,498)]","[Proceeds from issuance of term debt, net, 2,2...","[Repayments of term debt, (5,250), (2,500)]",[Proceeds from/(Repayments of) commercial pape...,"[Proceeds from repurchase agreement, 2,556, 0]","[Other, (51), (51)]","[Cash used in financing activities, (46,347), ...","[Increase/(Decrease) in cash, cash equivalents...","[Cash, cash equivalents and restricted cash, e...","[Cash paid for income taxes, net, 7,505, 9,497]","[Cash paid for interest, $ 1,689, $ 1,762]",,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
106,"[Cash, cash equivalents and restricted cash, b...","[Net income, 44,738, 41,570]","[Depreciation and amortization, 8,354, 9,368]","[Share-based compensation expense, 5,105, 4,569]","[Deferred income tax expense/(benefit), 182, (...","[Other, (94), (340)]","[Accounts receivable, net, 5,149, 9,013]","[Inventories, 10, 496]","[Vendor non-trade receivables, 8,685, 13,483]","[Other current and non-current assets, (6,760)...","[Accounts payable, (10,787), (19,804)]","[Deferred revenue, 1,649, (776)]","[Other current and non-current liabilities, 3,...","[Cash generated by operating activities, 60,09...","[Purchases of marketable securities, (96,606),...",[Proceeds from maturities of marketable securi...,"[Proceeds from sales of marketable securities,...","[Payments for acquisition of property, plant a...",[Payments made in connection with business acq...,"[Purchases of non-marketable securities, (210)...","[Proceeds from non-marketable securities, 58, ...","[Other, (689), (268)]",[Cash generated by/(used in) investing activit...,"[Proceeds from issuance of common stock, 430, ...",[Payments for taxes related to net share settl...,[Payments for dividends and dividend equivalen...,"[Repurchases of common stock, (55,171), (49,453)]","[Proceeds from issuance of term debt, net, 10,...","[Repayments of term debt, (12,629), (5,500)]",[Proceeds from/(Repayments of) commercial pape...,"[Proceeds from repurchase agreements, 5,165, 0]","[Other, (120), (83)]","[Cash used in financing activities, (65,463), ...","[Increase/(Decrease) in cash, cash equivalents...","[Cash, cash equivalents and restricted cash, e...","[Cash paid for income taxes, net, 8,410, 11,795]","[Cash paid for interest, $ 2,275, $ 2,563]",,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [141]:
# Put the data in a DataFrame
income_df = pd.DataFrame(income_data)

# Display
print('-'*100)
print('Before Reindexing')
print('-'*100)
display(income_df.head())

# Define the Index column, rename it, and we need to make sure to drop the old column once we reindex.
income_df.index = income_df[0]
income_df.index.name = 'Category'
income_df = income_df.drop(0, axis = 1)

# Display
print('-'*100)
print('Before Regex')
print('-'*100)
display(income_df.head())

# Get rid of the '$', '(', ')', and convert the '' to NaNs.
income_df = income_df.replace('[\$,)]','', regex=True )\
                     .replace( '[(]','-', regex=True)\
                     .replace( '', 'NaN', regex=True)

# Display
print('-'*100)
print('Before type conversion')
print('-'*100)
display(income_df.head())

# everything is a string, so let's convert all the data to a float.
income_df = income_df.astype(float)

# Change the column headers
income_df.columns = income_header

# Display
print('-'*100)
print('Final Product')
print('-'*100)

# show the df
income_df

# drop the data in a CSV file if need be.
# income_df.to_csv('income_state.csv')

----------------------------------------------------------------------------------------------------
Before Reindexing
----------------------------------------------------------------------------------------------------


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107
0,"Cash, cash equivalents and restricted cash, be...","$ 50,224","$ 25,913","$ 20,289",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Net income,57411,55256,59531,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Depreciation and amortization,11056,12547,10903,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Share-based compensation expense,6829,6068,5340,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Deferred income tax benefit,(215),(340),"(32,590)",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


----------------------------------------------------------------------------------------------------
Before Regex
----------------------------------------------------------------------------------------------------


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
"Cash, cash equivalents and restricted cash, beginning balances","$ 50,224","$ 25,913","$ 20,289",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Net income,57411,55256,59531,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Depreciation and amortization,11056,12547,10903,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Share-based compensation expense,6829,6068,5340,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Deferred income tax benefit,(215),(340),"(32,590)",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


----------------------------------------------------------------------------------------------------
Before type conversion
----------------------------------------------------------------------------------------------------


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
"Cash, cash equivalents and restricted cash, beginning balances",50224,25913,20289,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Net income,57411,55256,59531,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Depreciation and amortization,11056,12547,10903,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Share-based compensation expense,6829,6068,5340,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Deferred income tax benefit,-215,-340,-32590,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


ValueError: ignored

In [None]:
income_df.transpose()



In [None]:
income_df.T.index

In [None]:
pd.to_datetime(income_df.T.index)

In [None]:
income_df_new = income_df.T
income_df_new.index = pd.to_datetime(income_df_new.index)

income_df_new

In [None]:
income_df_new = income_df_new.dropna(axis=1)

income_df_new


In [None]:
!pip install yfinance

In [None]:
# income_df_new.mean(axis=0)

import yfinance as yf

tickers = ["AAPL"] # Tesla stock and Bitcoin in USD

start_date = '2010-01-01'
end_date = '2021-01-01'

In [None]:
panel_data = yf.download(tickers=tickers, start=start_date, end=end_date)[['Adj Close']].dropna(axis=0)
panel_data

In [None]:
# obtain percentage change for each stock to use as the y variable in training
panel_data['AAPL_pct_change'] = panel_data['Adj Close'].pct_change()

# panel_data.dropna(axis=0)

#ignore first row due to NaN
panel_data = panel_data.iloc[1:, :]
panel_data

In [None]:
from datetime import timedelta
import numpy as np
# obtain dates of income_df (they are the indices)
dates = income_df_new.index
# subtract one day from each day to obtain the Friday's instead of the saturdays
dates = dates - timedelta(days=1)
# obtain the one year before the minimum date value in financial statement
new_date = dates.min() - timedelta(days=366) # subtract 90 when you do quarterlies
# append new date to our dates
new_dates = np.append(dates, np.datetime64(new_date))
# slice out the desired dates in our pct_change df
panel_data.loc[pd.DatetimeIndex(new_dates), :]

In [None]:


# Make new dataframe of relevant stock adjusted close prices (2020-09-26, 2019-09-28, 2018-09-29, 2017-09-29)
consolidated_df = panel_data.loc[new_dates, :]


consolidated_df['AAPL_pct_change'] = consolidated_df[['Adj Close']].pct_change(-1)
consolidated_df = consolidated_df.dropna(axis = 0)
consolidated_df

In [None]:
income_df_new['Ann_pct_change'] = consolidated_df['AAPL_pct_change']

income_df_new