In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from os import listdir
import re

import csv
import datetime
import pickle
import os

import calendar
import random

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 500)
pd.options.display.float_format = '{:.0f}'.format

In [3]:
DATA_ROOT_PATH="/mnt/data/projects/MD1/data/R1000/reports/"
PROJ_ROOT_PATH="/home/priyesh/projects/MD1"

In [4]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','clean_master_data_stage2.pkl')
clean_master_data = pd.read_pickle(filepath)

In [5]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','master_data_stage2.pkl')
master_data = pd.read_pickle(filepath)

In [6]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','mappings_income_stage3.pkl')
label_mappings = pd.read_pickle(filepath)

In [7]:
heading_lookup = {}
line_item_lookup = {}

stype='income'

temp = label_mappings['headings'].to_dict()
heading_lookup[stype] = temp['type']

temp = label_mappings['line_items'].to_dict()
line_item_lookup[stype] = temp['type']

In [8]:
def find_line_items(table, heading_lookup, line_item_lookup, stype):
 
  df_new = pd.DataFrame()

  h = 'No Heading'
  items_found = []
    
  h_lookup = heading_lookup[stype]
  l_lookup = line_item_lookup[stype]
    
  items_required = list(set(l_lookup.values()))

  # Go through each line in table, if row contains only nan then assume heading

  for index, row in table.iterrows():

    if row.isnull().all():
   
      # If all columns are null then it is either a header we are interested in, a header not recognised 
      # or a line item where values are not specified.
    
      # If we encounter a header that has Member or Abstract then terminate loop and ignore subsequent items.
    
      if index in h_lookup:
        
        h = h_lookup[index]
        
        if h == 'EXCL':
          break
        else:
          r1 = row.copy()
          #r1 = r1.rename(swap_text('H',index,stype))
          r1['type'] = h
          r1['line_type'] = 'H'
         # Temporarily avoid extracting headers to avoid duplicate matches
         # df_new = pd.concat([df_new, pd.DataFrame([r1])])
  
    else:
      # If row matches one of the items mapped for current header then store
        
      if index in l_lookup:
        
        l = l_lookup[index]
        items_found.append(l)
          
        r1 = row.copy()
        #r1.rename(swap_text('L',index,stype))
        r1['type'] = l
        r1['line_type'] = 'L'
        df_new = pd.concat([df_new, pd.DataFrame([r1])])
    
    if index.startswith('Basic'):
        break
        
    if index.startswith('Net Revenues From Collaborators'):
        break
        
    if re.match(r'^Weighted.*Average.*Shares',index, re.IGNORECASE):
        break
        
    if re.match(r'^Weighted.*Average.*Basic',index, re.IGNORECASE):
        break
        
    if re.match(r'^Earnings.*Common Share.*Basic',index, re.IGNORECASE): 
        break
        
    if re.match(r'^Earnings.*per Share.*Basic',index, re.IGNORECASE): 
        break
        
    if re.match(r'^Net.*Income.*Attrib',index, re.IGNORECASE): 
        break
 
  return(df_new)

In [9]:
def extract_summary(stype):
    
  ticker_list = list(clean_master_data.keys())

  master_summary = {}

  for ticker in ticker_list:

    master_summary[ticker] = {}
    print(ticker)
    
    for yr in clean_master_data[ticker]:
    
      # Extract statement for each year
    
      if stype in clean_master_data[ticker][yr]:
        
        master_summary[ticker][yr] = find_line_items(clean_master_data[ticker][yr][stype]['table'],
                                                          heading_lookup,line_item_lookup,stype)
  return master_summary

In [10]:
def conv_date_cols(cols):
    
  mnth_lookup = {month: index for index, month in enumerate(calendar.month_abbr) if month}

  new_cols = []

  for c in cols:
    
    x = re.search(r"^[A-Z][a-z][a-z]\. \d{2}, \d{4}", c)
    
    if x:
      mnth = mnth_lookup[c.split(".")[0]]
      yr = c[-4:]
      t = yr + '-' + str(mnth)
      new_cols = new_cols + [t]
    else:
      new_cols = new_cols + [c] 

  return new_cols

In [11]:
def find_duplicates(df1):

  df_temp = df1.copy()
  df_temp = df_temp.reset_index(names=['line_item'])
  df_dup = df_temp[df_temp.duplicated(subset=['line_item','line_type'])]

  return df_dup

In [12]:
def extract_statements(stype, master_summary_data):
    
  ticker_list = list(master_summary_data.keys())

  master_statements = {}

  for ticker in master_summary_data:
    
    print(ticker)
    statement = master_summary_data[ticker]
    keys = list(statement.keys())
    
    if len(keys) > 0:
                                            
      keys = sorted(keys,reverse=True)
      df = statement[keys[0]].copy()
      df = df[~df.index.duplicated(keep='first')]

      for i in range(1,len(keys)):
        
        print("Building Table for",ticker,i)
        
        df1_cols = list(set(statement[keys[i]].columns)- set(df.columns))
        df1 = statement[keys[i]][df1_cols].copy()
        df1 = df1[~df1.index.duplicated(keep='first')] 
        df = df.merge(df1, right_index=True, left_index=True, how='left', suffixes=['_x', '_y'])  
    
      # We could end up with an empty dataframe if there are no matches. So skip if that is the case.
     
      if len(df) > 0:
        # Amend date format of column names
        
        cols = list(df.columns)
        new_cols = conv_date_cols(cols)
        df.columns = new_cols

        # Sort in reverse date order, place type and line_type at end
    
        new_cols.remove('type')
        new_cols.remove('line_type')
        cols = sorted(new_cols,reverse=True) + ['type','line_type']
        df = df[cols]
        
      master_statements[ticker] = df

  return master_statements

In [13]:
df1 = clean_master_data['BK']['18']['income']['table']
df1

Unnamed: 0_level_0,"Dec. 31, 2017","Dec. 31, 2016","Dec. 31, 2015"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Investment Services Fees:,,,
Asset Servicing,4383.0,4244.0,4187.0
Clearing Services,1553.0,1404.0,1375.0
Issuer Services,977.0,1026.0,978.0
Treasury Services,557.0,547.0,555.0
Total Investment Services Fees,7470.0,7221.0,7095.0
Investment Management And Performance Fees,3584.0,3350.0,3438.0
Foreign Exchange And Other Trading Revenue,668.0,701.0,768.0
Financing-Related Fees,216.0,219.0,220.0
Distribution And Servicing,160.0,166.0,162.0


In [14]:
find_line_items(df1, heading_lookup, line_item_lookup, 'income')


Unnamed: 0,"Dec. 31, 2017","Dec. 31, 2016","Dec. 31, 2015",type,line_type
Total Fee Revenue,12162,11998,11999,TR,L
Total Fee And Other Revenue,12165,12073,12082,TR,L
Total Revenue,15543,15237,15194,TR,L
Net Income,4114,3548,3222,NI,L


In [15]:
# Create lookup dictionaries

swap_text_dict = {}

swap_text_dict['H'] = {}
swap_text_dict['L'] = {}

In [16]:
income_summary = extract_summary('income')

AAL
AAP
AAPL
ABBV
ABT
ACGL
ACHC
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
AEE
AEP
AES
AFG
AFL
AGCO
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AMAT
AMC
AMD
AME
AMED
AMG
AMGN
AMH
AMP
AMT
AMZN
AN
ANET
ANSS
AON
AOS
APD
APH
APTV
AR
ARE
ARES
ARMK
ARW
ATO
ATR
ATVI
AVB
AVT
AWI
AWK
AXS
AXTA
AYI
AZO
BA
BAC
BAH
BALL
BAX
BBY
BC
BDX
BEN
BERY
BFAM
BG
BIIB
BIO
BK
BKI
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BRX
BSX
BURL
BWA
BWXT
BX
BXP
BYD
C
CABO
CACC
CACI
CAG
CAH
CAR
CASY
CAT
CB
CBOE
CBRE
CBSH
CC
CCI
CCK
CCL
CDNS
CDW
CE
CF
CFG
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHK
CHRW
CHTR
CIEN
CINF
CL
CLH
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNC
CNP
COF
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CPT
CR
CRI
CRL
CRUS
CSCO
CSGP
CSL
CSX
CTAS
CTLT
CTSH
CUBE
CUZ
CVS
CVX
CW
D
DAL
DAR
DCI
DECK
DEI
DFS
DG
DGX
DHI
DHR
DISH
DKS
DLB
DLR
DLTR
DOV
DPZ
DRI
DTE
DUK
DVA
DVN
DXCM
EA
EBAY
ECL
ED
EEFT
EFX
EGP
EHC
EIX
EL
ELS
EMN
EMR
ENPH
ENTG
EOG
EPAM
EPR
EQIX
EQR
EQT
ERIE
ES
ESI
ESS
ETN
ETR
EVA
EVR
EW
EWBC
EXAS
EXC
EXEL
EXP
EXPD

In [17]:
len(income_summary)

763

In [18]:
income_summary['NFG']['22']

Unnamed: 0,"Sep. 30, 2022","Sep. 30, 2021","Sep. 30, 2020",type,line_type
"Depreciation, Depletion And Amortization",369790,335303,306158,DEP,L
Total Operating Expenses,1384266,1153801,1516433,TOE,L
Operating Income,814516,639924,29858,OI,L


In [19]:
dups_list = []
tickers_with_dups = {}

ticker_list = list(income_summary.keys())

for ticker in ticker_list:
  
  print(ticker)
    
  for yr in income_summary[ticker]:
    
    df_dup = find_duplicates(income_summary[ticker][yr])
    
    if len(df_dup) > 0:
       
       dups = {}
       dups['yr'] = yr
       dups['df_orig'] = income_summary[ticker][yr]
       dups['df_dup']  = df_dup
        
       tickers_with_dups[ticker] = dups


AAL
AAP
AAPL
ABBV
ABT
ACGL
ACHC
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
AEE
AEP
AES
AFG
AFL
AGCO
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AMAT
AMC
AMD
AME
AMED
AMG
AMGN
AMH
AMP
AMT
AMZN
AN
ANET
ANSS
AON
AOS
APD
APH
APTV
AR
ARE
ARES
ARMK
ARW
ATO
ATR
ATVI
AVB
AVT
AWI
AWK
AXS
AXTA
AYI
AZO
BA
BAC
BAH
BALL
BAX
BBY
BC
BDX
BEN
BERY
BFAM
BG
BIIB
BIO
BK
BKI
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BRX
BSX
BURL
BWA
BWXT
BX
BXP
BYD
C
CABO
CACC
CACI
CAG
CAH
CAR
CASY
CAT
CB
CBOE
CBRE
CBSH
CC
CCI
CCK
CCL
CDNS
CDW
CE
CF
CFG
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHK
CHRW
CHTR
CIEN
CINF
CL
CLH
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNC
CNP
COF
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CPT
CR
CRI
CRL
CRUS
CSCO
CSGP
CSL
CSX
CTAS
CTLT
CTSH
CUBE
CUZ
CVS
CVX
CW
D
DAL
DAR
DCI
DECK
DEI
DFS
DG
DGX
DHI
DHR
DISH
DKS
DLB
DLR
DLTR
DOV
DPZ
DRI
DTE
DUK
DVA
DVN
DXCM
EA
EBAY
ECL
ED
EEFT
EFX
EGP
EHC
EIX
EL
ELS
EMN
EMR
ENPH
ENTG
EOG
EPAM
EPR
EQIX
EQR
EQT
ERIE
ES
ESI
ESS
ETN
ETR
EVA
EVR
EW
EWBC
EXAS
EXC
EXEL
EXP
EXPD

In [20]:
tickers_with_dups.keys()

dict_keys(['AEE', 'AMC', 'AMH', 'ARES', 'ATO', 'BALL', 'BFAM', 'BKI', 'CBRE', 'CCI', 'CE', 'CLX', 'CNC', 'CVX', 'DISH', 'DTE', 'DVA', 'ED', 'EGP', 'EIX', 'EPAM', 'EQT', 'EVA', 'EXEL', 'FITB', 'FWONA', 'FWONK', 'GE', 'GLPI', 'GPS', 'HBI', 'HRB', 'HST', 'HWM', 'IART', 'IBM', 'IEX', 'IQV', 'IRM', 'ITT', 'L', 'LII', 'LKQ', 'LSCC', 'LSXMA', 'LSXMK', 'LUV', 'MCHP', 'MLM', 'MOH', 'MRCY', 'MRTX', 'MS', 'NI', 'NOC', 'PAYC', 'PCG', 'PKI', 'PLUG', 'PNR', 'PSX', 'RARE', 'REXR', 'RMD', 'SEB', 'SGEN', 'SMG', 'SO', 'SPB', 'SSNC', 'STZ', 'TFX', 'TNDM', 'TRU', 'VLO', 'VRTX', 'WEX', 'X', 'Z', 'ZG'])

In [21]:
len(tickers_with_dups)

80

In [22]:
ticker='SGEN'
yr = tickers_with_dups[ticker]['yr']
tickers_with_dups[ticker]['df_dup']

Unnamed: 0,line_item,"Dec. 31, 2019","Dec. 31, 2018","Dec. 31, 2017",type,line_type
6,Net Loss,-158650000,-222693000,-125530000,NI,L


In [23]:
tickers_with_dups[ticker]['df_orig']

Unnamed: 0,"Dec. 31, 2019","Dec. 31, 2018","Dec. 31, 2017",type,line_type
Total Revenue,916713000,654700000,482250000,TR,L
Research And Development,719374000,565309000,456700000,RD,L
SGA,373932000,261096000,167233000,SGA,L
Total Costs And Expenses,1137258000,914698000,678051000,TOE,L
Loss From Operations,-220545000,-259998000,-195801000,LCO,L
Net Loss,-158650000,-222693000,-125530000,NI,L
Net Loss,-158650000,-222693000,-125530000,NI,L


In [24]:
df1 = clean_master_data[ticker][yr]['income']['table']
df1

Unnamed: 0_level_0,"Dec. 31, 2019","Dec. 31, 2018","Dec. 31, 2017"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Revenues:,,,
Total Revenue,916713000.0,654700000.0,482250000.0
Costs And Expenses:,,,
Research And Development,719374000.0,565309000.0,456700000.0
SGA,373932000.0,261096000.0,167233000.0
Total Costs And Expenses,1137258000.0,914698000.0,678051000.0
Loss From Operations,-220545000.0,-259998000.0,-195801000.0
"Investment And Other Income, Net",61895000.0,13652000.0,36914000.0
Loss Before Income Taxes,-158650000.0,-246346000.0,-158887000.0
Income Tax Benefit,0.0,23653000.0,33357000.0


In [25]:
master_data[ticker][yr]['income']

Unnamed: 0_level_0,Consolidated Statements of Comprehensive Loss - USD ($) shares in Thousands,12 Months Ended,12 Months Ended,12 Months Ended
Unnamed: 0_level_1,Consolidated Statements of Comprehensive Loss - USD ($) shares in Thousands,"Dec. 31, 2019","Dec. 31, 2018","Dec. 31, 2017"
0,Revenues:,,,
1,Total revenues,"$ 916,713,000","$ 654,700,000","$ 482,250,000"
2,Costs and expenses:,,,
3,Research and development,719374000,565309000,456700000
4,"Selling, general and administrative",373932000,261096000,167233000
5,Total costs and expenses,1137258000,914698000,678051000
6,Loss from operations,"(220,545,000)","(259,998,000)","(195,801,000)"
7,"Investment and other income, net",61895000,13652000,36914000
8,Loss before income taxes,"(158,650,000)","(246,346,000)","(158,887,000)"
9,Income tax benefit,0,23653000,33357000


In [26]:
# Remove companies with duplicates

for ticker in tickers_with_dups.keys():
   print(ticker)
   clean_master_data.pop(ticker)

AEE
AMC
AMH
ARES
ATO
BALL
BFAM
BKI
CBRE
CCI
CE
CLX
CNC
CVX
DISH
DTE
DVA
ED
EGP
EIX
EPAM
EQT
EVA
EXEL
FITB
FWONA
FWONK
GE
GLPI
GPS
HBI
HRB
HST
HWM
IART
IBM
IEX
IQV
IRM
ITT
L
LII
LKQ
LSCC
LSXMA
LSXMK
LUV
MCHP
MLM
MOH
MRCY
MRTX
MS
NI
NOC
PAYC
PCG
PKI
PLUG
PNR
PSX
RARE
REXR
RMD
SEB
SGEN
SMG
SO
SPB
SSNC
STZ
TFX
TNDM
TRU
VLO
VRTX
WEX
X
Z
ZG


In [27]:
# Repeat the extraction. This time we shouldn't have any duplicates.

income_summary = extract_summary('income')

AAL
AAP
AAPL
ABBV
ABT
ACGL
ACHC
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
AEP
AES
AFG
AFL
AGCO
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AMAT
AMD
AME
AMED
AMG
AMGN
AMP
AMT
AMZN
AN
ANET
ANSS
AON
AOS
APD
APH
APTV
AR
ARE
ARMK
ARW
ATR
ATVI
AVB
AVT
AWI
AWK
AXS
AXTA
AYI
AZO
BA
BAC
BAH
BAX
BBY
BC
BDX
BEN
BERY
BG
BIIB
BIO
BK
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BRX
BSX
BURL
BWA
BWXT
BX
BXP
BYD
C
CABO
CACC
CACI
CAG
CAH
CAR
CASY
CAT
CB
CBOE
CBSH
CC
CCK
CCL
CDNS
CDW
CF
CFG
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHK
CHRW
CHTR
CIEN
CINF
CL
CLH
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNP
COF
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CPT
CR
CRI
CRL
CRUS
CSCO
CSGP
CSL
CSX
CTAS
CTLT
CTSH
CUBE
CUZ
CVS
CW
D
DAL
DAR
DCI
DECK
DEI
DFS
DG
DGX
DHI
DHR
DKS
DLB
DLR
DLTR
DOV
DPZ
DRI
DUK
DVN
DXCM
EA
EBAY
ECL
EEFT
EFX
EHC
EL
ELS
EMN
EMR
ENPH
ENTG
EOG
EPR
EQIX
EQR
ERIE
ES
ESI
ESS
ETN
ETR
EVR
EW
EWBC
EXAS
EXC
EXP
EXPD
EXPE
EXR
F
FAF
FANG
FAST
FCN
FCNCA
FCX
FDS
FDX
FE
FFIV
FHN
FICO
FIS
FISV
FIVE
FIVN
FLO
FLS
FLT
FMC
F

In [28]:
key_list = list(income_summary.keys())
len(key_list)

683

In [29]:
key_list = list(income_summary.keys())
i = random.randint(0,len(key_list))
ticker=key_list[i]
print(ticker)
income_summary[ticker]['21']

AMZN


Unnamed: 0,"Dec. 31, 2020","Dec. 31, 2019","Dec. 31, 2018",type,line_type
Total Net Sales,386064,280522,232887,TR,L
Cost Of Sales,233307,165536,139156,CR,L
General And Admin,6668,5203,4336,SGA,L
Total Operating Expenses,363165,265981,220466,TOE,L
Operating Income,22899,14541,12421,OI,L
Net Income,21331,11588,10073,NI,L


In [30]:
print(ticker)
clean_master_data[ticker]['21']['income']['table']

AMZN


Unnamed: 0_level_0,"Dec. 31, 2020","Dec. 31, 2019","Dec. 31, 2018"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Total Net Sales,386064.0,280522.0,232887.0
Operating Expenses:,,,
Cost Of Sales,233307.0,165536.0,139156.0
Fulfillment,58517.0,40232.0,34027.0
Technology And Content,42740.0,35931.0,28837.0
Marketing,22008.0,18878.0,13814.0
General And Admin,6668.0,5203.0,4336.0
"Other Operating Expense Income, Net",-75.0,201.0,296.0
Total Operating Expenses,363165.0,265981.0,220466.0
Operating Income,22899.0,14541.0,12421.0


In [None]:

df1 = clean_master_data[ticker][yr]['income']['table']
df1

In [None]:
master_data[ticker][yr]['income']

In [None]:
df1 = clean_master_data[ticker][yr]['income']['table']
find_line_items(df1, heading_lookup, line_item_lookup, 'income')

In [None]:
income_summary[ticker][yr]

In [None]:
find_duplicates(income_summary[ticker][yr])

### Build Statements

In [32]:
master_statements = extract_statements('income',income_summary)

AAL
Building Table for AAL 1
Building Table for AAL 2
Building Table for AAL 3
Building Table for AAL 4
Building Table for AAL 5
Building Table for AAL 6
Building Table for AAL 7
Building Table for AAL 8
Building Table for AAL 9
Building Table for AAL 10
Building Table for AAL 11
AAP
Building Table for AAP 1
Building Table for AAP 2
Building Table for AAP 3
Building Table for AAP 4
Building Table for AAP 5
Building Table for AAP 6
Building Table for AAP 7
Building Table for AAP 8
Building Table for AAP 9
Building Table for AAP 10
AAPL
Building Table for AAPL 1
Building Table for AAPL 2
Building Table for AAPL 3
Building Table for AAPL 4
Building Table for AAPL 5
Building Table for AAPL 6
Building Table for AAPL 7
Building Table for AAPL 8
Building Table for AAPL 9
Building Table for AAPL 10
Building Table for AAPL 11
ABBV
Building Table for ABBV 1
Building Table for ABBV 2
Building Table for ABBV 3
Building Table for ABBV 4
Building Table for ABBV 5
Building Table for ABBV 6
Building T

Building Table for AMT 7
Building Table for AMT 8
Building Table for AMT 9
Building Table for AMT 10
Building Table for AMT 11
AMZN
Building Table for AMZN 1
Building Table for AMZN 2
Building Table for AMZN 3
Building Table for AMZN 4
Building Table for AMZN 5
Building Table for AMZN 6
Building Table for AMZN 7
Building Table for AMZN 8
Building Table for AMZN 9
Building Table for AMZN 10
Building Table for AMZN 11
AN
Building Table for AN 1
Building Table for AN 2
Building Table for AN 3
Building Table for AN 4
Building Table for AN 5
Building Table for AN 6
Building Table for AN 7
Building Table for AN 8
Building Table for AN 9
Building Table for AN 10
Building Table for AN 11
ANET
Building Table for ANET 1
Building Table for ANET 2
Building Table for ANET 3
Building Table for ANET 4
Building Table for ANET 5
Building Table for ANET 6
Building Table for ANET 7
Building Table for ANET 8
ANSS
Building Table for ANSS 1
Building Table for ANSS 2
Building Table for ANSS 3
Building Table 

Building Table for BMY 7
Building Table for BMY 8
Building Table for BMY 9
Building Table for BMY 10
Building Table for BMY 11
BOKF
Building Table for BOKF 1
Building Table for BOKF 2
Building Table for BOKF 3
Building Table for BOKF 4
Building Table for BOKF 5
Building Table for BOKF 6
Building Table for BOKF 7
Building Table for BOKF 8
Building Table for BOKF 9
Building Table for BOKF 10
Building Table for BOKF 11
BR
Building Table for BR 1
Building Table for BR 2
Building Table for BR 3
Building Table for BR 4
Building Table for BR 5
Building Table for BR 6
Building Table for BR 7
Building Table for BR 8
Building Table for BR 9
Building Table for BR 10
Building Table for BR 11
BRKR
Building Table for BRKR 1
Building Table for BRKR 2
Building Table for BRKR 3
Building Table for BRKR 4
Building Table for BRKR 5
Building Table for BRKR 6
Building Table for BRKR 7
Building Table for BRKR 8
Building Table for BRKR 9
Building Table for BRKR 10
BRO
Building Table for BRO 1
Building Table f

Building Table for CIEN 5
Building Table for CIEN 6
Building Table for CIEN 7
Building Table for CIEN 8
Building Table for CIEN 9
Building Table for CIEN 10
Building Table for CIEN 11
CINF
Building Table for CINF 1
Building Table for CINF 2
Building Table for CINF 3
Building Table for CINF 4
Building Table for CINF 5
Building Table for CINF 6
Building Table for CINF 7
Building Table for CINF 8
Building Table for CINF 9
Building Table for CINF 10
Building Table for CINF 11
CL
Building Table for CL 1
Building Table for CL 2
Building Table for CL 3
Building Table for CL 4
Building Table for CL 5
Building Table for CL 6
Building Table for CL 7
Building Table for CL 8
Building Table for CL 9
Building Table for CL 10
Building Table for CL 11
CLH
Building Table for CLH 1
Building Table for CLH 2
Building Table for CLH 3
Building Table for CLH 4
Building Table for CLH 5
Building Table for CLH 6
Building Table for CLH 7
Building Table for CLH 8
Building Table for CLH 9
Building Table for CLH 10

Building Table for DEI 7
Building Table for DEI 8
Building Table for DEI 9
Building Table for DEI 10
DFS
Building Table for DFS 1
Building Table for DFS 2
Building Table for DFS 3
Building Table for DFS 4
Building Table for DFS 5
Building Table for DFS 6
Building Table for DFS 7
Building Table for DFS 8
Building Table for DFS 9
Building Table for DFS 10
Building Table for DFS 11
DG
Building Table for DG 1
Building Table for DG 2
Building Table for DG 3
Building Table for DG 4
Building Table for DG 5
Building Table for DG 6
Building Table for DG 7
Building Table for DG 8
Building Table for DG 9
Building Table for DG 10
Building Table for DG 11
DGX
Building Table for DGX 1
Building Table for DGX 2
Building Table for DGX 3
Building Table for DGX 4
Building Table for DGX 5
Building Table for DGX 6
Building Table for DGX 7
Building Table for DGX 8
Building Table for DGX 9
Building Table for DGX 10
DHI
Building Table for DHI 1
Building Table for DHI 2
Building Table for DHI 3
Building Table 

Building Table for EXC 9
Building Table for EXC 10
Building Table for EXC 11
EXP
Building Table for EXP 1
Building Table for EXP 2
Building Table for EXP 3
Building Table for EXP 4
Building Table for EXP 5
Building Table for EXP 6
Building Table for EXP 7
Building Table for EXP 8
Building Table for EXP 9
Building Table for EXP 10
Building Table for EXP 11
EXPD
Building Table for EXPD 1
Building Table for EXPD 2
Building Table for EXPD 3
Building Table for EXPD 4
Building Table for EXPD 5
Building Table for EXPD 6
Building Table for EXPD 7
Building Table for EXPD 8
Building Table for EXPD 9
Building Table for EXPD 10
Building Table for EXPD 11
EXPE
Building Table for EXPE 1
Building Table for EXPE 2
Building Table for EXPE 3
Building Table for EXPE 4
Building Table for EXPE 5
Building Table for EXPE 6
Building Table for EXPE 7
Building Table for EXPE 8
Building Table for EXPE 9
EXR
Building Table for EXR 1
Building Table for EXR 2
Building Table for EXR 3
Building Table for EXR 4
Buildi

Building Table for GME 10
GMED
Building Table for GMED 1
Building Table for GMED 2
Building Table for GMED 3
Building Table for GMED 4
Building Table for GMED 5
Building Table for GMED 6
Building Table for GMED 7
Building Table for GMED 8
Building Table for GMED 9
GNRC
Building Table for GNRC 1
Building Table for GNRC 2
Building Table for GNRC 3
Building Table for GNRC 4
Building Table for GNRC 5
Building Table for GNRC 6
Building Table for GNRC 7
Building Table for GNRC 8
Building Table for GNRC 9
Building Table for GNRC 10
Building Table for GNRC 11
GNTX
Building Table for GNTX 1
Building Table for GNTX 2
Building Table for GNTX 3
Building Table for GNTX 4
Building Table for GNTX 5
Building Table for GNTX 6
Building Table for GNTX 7
Building Table for GNTX 8
Building Table for GNTX 9
Building Table for GNTX 10
Building Table for GNTX 11
GOOG
Building Table for GOOG 1
Building Table for GOOG 2
Building Table for GOOG 3
Building Table for GOOG 4
Building Table for GOOG 5
Building Table

Building Table for IDA 3
Building Table for IDA 4
Building Table for IDA 5
Building Table for IDA 6
Building Table for IDA 7
Building Table for IDA 8
Building Table for IDA 9
Building Table for IDA 10
Building Table for IDA 11
IDXX
Building Table for IDXX 1
Building Table for IDXX 2
Building Table for IDXX 3
Building Table for IDXX 4
Building Table for IDXX 5
Building Table for IDXX 6
Building Table for IDXX 7
Building Table for IDXX 8
Building Table for IDXX 9
Building Table for IDXX 10
Building Table for IDXX 11
IFF
Building Table for IFF 1
Building Table for IFF 2
Building Table for IFF 3
Building Table for IFF 4
Building Table for IFF 5
Building Table for IFF 6
Building Table for IFF 7
Building Table for IFF 8
Building Table for IFF 9
Building Table for IFF 10
ILMN
Building Table for ILMN 1
Building Table for ILMN 2
Building Table for ILMN 3
Building Table for ILMN 4
Building Table for ILMN 5
Building Table for ILMN 6
Building Table for ILMN 7
Building Table for ILMN 8
Building Tab

Building Table for KMX 8
Building Table for KMX 9
Building Table for KMX 10
Building Table for KMX 11
KNX
Building Table for KNX 1
Building Table for KNX 2
Building Table for KNX 3
Building Table for KNX 4
Building Table for KNX 5
Building Table for KNX 6
Building Table for KNX 7
Building Table for KNX 8
Building Table for KNX 9
Building Table for KNX 10
Building Table for KNX 11
KO
Building Table for KO 1
Building Table for KO 2
Building Table for KO 3
Building Table for KO 4
Building Table for KO 5
Building Table for KO 6
Building Table for KO 7
Building Table for KO 8
Building Table for KO 9
Building Table for KO 10
Building Table for KO 11
KR
Building Table for KR 1
Building Table for KR 2
Building Table for KR 3
Building Table for KR 4
Building Table for KR 5
Building Table for KR 6
Building Table for KR 7
Building Table for KR 8
Building Table for KR 9
Building Table for KR 10
Building Table for KR 11
KRC
Building Table for KRC 1
Building Table for KRC 2
Building Table for KRC 3


Building Table for MASI 7
Building Table for MASI 8
Building Table for MASI 9
Building Table for MASI 10
Building Table for MASI 11
MAT
Building Table for MAT 1
Building Table for MAT 2
Building Table for MAT 3
Building Table for MAT 4
Building Table for MAT 5
Building Table for MAT 6
Building Table for MAT 7
Building Table for MAT 8
Building Table for MAT 9
Building Table for MAT 10
Building Table for MAT 11
MCD
Building Table for MCD 1
Building Table for MCD 2
Building Table for MCD 3
Building Table for MCD 4
Building Table for MCD 5
Building Table for MCD 6
Building Table for MCD 7
Building Table for MCD 8
Building Table for MCD 9
Building Table for MCD 10
MCK
Building Table for MCK 1
Building Table for MCK 2
Building Table for MCK 3
Building Table for MCK 4
Building Table for MCK 5
Building Table for MCK 6
Building Table for MCK 7
Building Table for MCK 8
Building Table for MCK 9
Building Table for MCK 10
Building Table for MCK 11
MCO
Building Table for MCO 1
Building Table for MCO

Building Table for NCLH 2
Building Table for NCLH 3
Building Table for NCLH 4
Building Table for NCLH 5
Building Table for NCLH 6
Building Table for NCLH 7
Building Table for NCLH 8
Building Table for NCLH 9
NCR
Building Table for NCR 1
Building Table for NCR 2
Building Table for NCR 3
Building Table for NCR 4
Building Table for NCR 5
Building Table for NCR 6
Building Table for NCR 7
Building Table for NCR 8
Building Table for NCR 9
Building Table for NCR 10
Building Table for NCR 11
NDAQ
Building Table for NDAQ 1
Building Table for NDAQ 2
Building Table for NDAQ 3
Building Table for NDAQ 4
Building Table for NDAQ 5
Building Table for NDAQ 6
Building Table for NDAQ 7
Building Table for NDAQ 8
Building Table for NDAQ 9
Building Table for NDAQ 10
NDSN
Building Table for NDSN 1
Building Table for NDSN 2
Building Table for NDSN 3
Building Table for NDSN 4
Building Table for NDSN 5
Building Table for NDSN 6
Building Table for NDSN 7
Building Table for NDSN 8
Building Table for NDSN 9
Buildi

Building Table for ON 10
Building Table for ON 11
ORCL
Building Table for ORCL 1
Building Table for ORCL 2
Building Table for ORCL 3
Building Table for ORCL 4
Building Table for ORCL 5
Building Table for ORCL 6
Building Table for ORCL 7
Building Table for ORCL 8
Building Table for ORCL 9
Building Table for ORCL 10
Building Table for ORCL 11
ORI
Building Table for ORI 1
Building Table for ORI 2
Building Table for ORI 3
Building Table for ORI 4
Building Table for ORI 5
Building Table for ORI 6
Building Table for ORI 7
Building Table for ORI 8
Building Table for ORI 9
Building Table for ORI 10
Building Table for ORI 11
ORLY
Building Table for ORLY 1
Building Table for ORLY 2
Building Table for ORLY 3
Building Table for ORLY 4
Building Table for ORLY 5
Building Table for ORLY 6
Building Table for ORLY 7
Building Table for ORLY 8
Building Table for ORLY 9
Building Table for ORLY 10
Building Table for ORLY 11
OSK
Building Table for OSK 1
Building Table for OSK 2
Building Table for OSK 3
Buil

Building Table for PRI 6
Building Table for PRI 7
Building Table for PRI 8
Building Table for PRI 9
Building Table for PRI 10
Building Table for PRI 11
PRU
Building Table for PRU 1
Building Table for PRU 2
Building Table for PRU 3
Building Table for PRU 4
Building Table for PRU 5
Building Table for PRU 6
Building Table for PRU 7
Building Table for PRU 8
Building Table for PRU 9
Building Table for PRU 10
PSA
Building Table for PSA 1
Building Table for PSA 2
Building Table for PSA 3
Building Table for PSA 4
Building Table for PSA 5
Building Table for PSA 6
Building Table for PSA 7
Building Table for PSA 8
Building Table for PSA 9
Building Table for PSA 10
Building Table for PSA 11
PSTG
Building Table for PSTG 1
Building Table for PSTG 2
Building Table for PSTG 3
Building Table for PSTG 4
Building Table for PSTG 5
Building Table for PSTG 6
Building Table for PSTG 7
PTC
Building Table for PTC 1
Building Table for PTC 2
Building Table for PTC 3
Building Table for PTC 4
Building Table for PT

Building Table for SLB 9
Building Table for SLB 10
Building Table for SLB 11
SLG
Building Table for SLG 1
Building Table for SLG 2
Building Table for SLG 3
Building Table for SLG 4
Building Table for SLG 5
Building Table for SLG 6
Building Table for SLG 7
Building Table for SLG 8
Building Table for SLG 9
Building Table for SLG 10
Building Table for SLG 11
SLGN
Building Table for SLGN 1
Building Table for SLGN 2
Building Table for SLGN 3
Building Table for SLGN 4
Building Table for SLGN 5
Building Table for SLGN 6
Building Table for SLGN 7
Building Table for SLGN 8
Building Table for SLGN 9
Building Table for SLGN 10
SNA
Building Table for SNA 1
Building Table for SNA 2
Building Table for SNA 3
Building Table for SNA 4
Building Table for SNA 5
Building Table for SNA 6
Building Table for SNA 7
Building Table for SNA 8
Building Table for SNA 9
Building Table for SNA 10
Building Table for SNA 11
SNPS
Building Table for SNPS 1
Building Table for SNPS 2
Building Table for SNPS 3
Building Tab

Building Table for TJX 11
TKR
Building Table for TKR 1
Building Table for TKR 2
Building Table for TKR 3
Building Table for TKR 4
Building Table for TKR 5
Building Table for TKR 6
Building Table for TKR 7
Building Table for TKR 8
Building Table for TKR 9
Building Table for TKR 10
Building Table for TKR 11
TMO
Building Table for TMO 1
Building Table for TMO 2
Building Table for TMO 3
Building Table for TMO 4
Building Table for TMO 5
Building Table for TMO 6
Building Table for TMO 7
Building Table for TMO 8
Building Table for TMO 9
Building Table for TMO 10
Building Table for TMO 11
TMUS
Building Table for TMUS 1
Building Table for TMUS 2
Building Table for TMUS 3
Building Table for TMUS 4
Building Table for TMUS 5
Building Table for TMUS 6
Building Table for TMUS 7
Building Table for TMUS 8
Building Table for TMUS 9
TOL
Building Table for TOL 1
Building Table for TOL 2
Building Table for TOL 3
Building Table for TOL 4
Building Table for TOL 5
Building Table for TOL 6
Building Table for 

Building Table for VFC 6
Building Table for VFC 7
Building Table for VFC 8
Building Table for VFC 9
Building Table for VFC 10
VIRT
Building Table for VIRT 1
Building Table for VIRT 2
Building Table for VIRT 3
Building Table for VIRT 4
Building Table for VIRT 5
Building Table for VIRT 6
Building Table for VIRT 7
VMC
Building Table for VMC 1
Building Table for VMC 2
Building Table for VMC 3
Building Table for VMC 4
Building Table for VMC 5
Building Table for VMC 6
Building Table for VMC 7
Building Table for VMC 8
Building Table for VMC 9
Building Table for VMC 10
VMI
Building Table for VMI 1
Building Table for VMI 2
Building Table for VMI 3
Building Table for VMI 4
Building Table for VMI 5
Building Table for VMI 6
VMW
Building Table for VMW 1
Building Table for VMW 2
Building Table for VMW 3
Building Table for VMW 4
Building Table for VMW 5
Building Table for VMW 6
Building Table for VMW 7
Building Table for VMW 8
Building Table for VMW 9
Building Table for VMW 10
VNO
Building Table for 

Building Table for WWD 8
Building Table for WWD 9
WWE
Building Table for WWE 1
Building Table for WWE 2
Building Table for WWE 3
Building Table for WWE 4
Building Table for WWE 5
Building Table for WWE 6
Building Table for WWE 7
Building Table for WWE 8
Building Table for WWE 9
Building Table for WWE 10
Building Table for WWE 11
WY
Building Table for WY 1
Building Table for WY 2
Building Table for WY 3
Building Table for WY 4
Building Table for WY 5
Building Table for WY 6
Building Table for WY 7
Building Table for WY 8
Building Table for WY 9
Building Table for WY 10
Building Table for WY 11
WYNN
Building Table for WYNN 1
Building Table for WYNN 2
Building Table for WYNN 3
Building Table for WYNN 4
Building Table for WYNN 5
Building Table for WYNN 6
Building Table for WYNN 7
Building Table for WYNN 8
Building Table for WYNN 9
Building Table for WYNN 10
Building Table for WYNN 11
XEL
Building Table for XEL 1
Building Table for XEL 2
Building Table for XEL 3
Building Table for XEL 4
Bui

In [33]:
master_statements['AAPL']

Unnamed: 0,2022-9,2021-9,2020-9,2019-9,2018-9,2017-9,2016-9,2015-9,2014-9,2013-9,2012-9,2011-9,2010-9,2009-9,type,line_type
Net Sales,394328,365817,274515,260174,265595,229234,215639,233715,182795,170910,156508,108249,65225,42905,R,L
Cost Of Sales,223546,212981,169559,161782,163756,141048,131376,140089,112258,106606,87846,64431,39541,25683,CR,L
Gross Margin,170782,152836,104956,98392,101839,88186,84263,93626,70537,64304,68662,43818,25684,17222,GM,L
Research And Development,26251,21914,18752,16217,14236,11581,10045,8067,6041,4475,3381,2429,1782,1333,RD,L
SGA,25094,21973,19916,18245,16705,15261,14194,14329,11993,10830,10040,7599,5517,4149,SGA,L
Total Operating Expenses,51345,43887,38668,34462,30941,26842,24239,22396,18034,15305,13421,10028,7299,5482,TOE,L
Operating Income,119437,108949,66288,63930,70898,61344,60024,71230,52503,48999,55241,33790,18385,11740,OI,L
Net Income,99803,94680,57411,55256,59531,48351,45687,53394,39510,37037,41733,25922,14013,8235,NI,L


In [41]:
key_list = list(income_summary.keys())
i = random.randint(0,len(key_list))
ticker=key_list[i]
print(ticker)
master_statements[ticker]

AZO


Unnamed: 0,2022-8,2021-8,2020-8,2019-8,2018-8,2017-8,2016-8,2015-8,2014-8,2013-8,2012-8,2011-8,2010-8,2009-8,type,line_type
Net Sales,16252230,14629585,12631967,11863743,11221077,10888676,10635676,10187340,9475313,9147530,8603863,8072973,7362618.0,6816824,R,L
"Cost Of Sales, Including Warehouse And Delivery Expenses",7779580,6911800,5861214,5498742,5247331,5149056,5026940,4860309,4540406,4406595,4171827,3953510,,3400375,CR,L
Gross Profit,8472650,7717785,6770753,6365001,5973746,5739620,5608736,5327031,4934907,4740935,4432036,4119463,3711744.0,3416449,GP,L
SGA,5201921,4773258,4353074,4148864,4162890,3659551,3548341,3373980,3104684,2967837,2803145,2624660,2392330.0,2240387,SGA,L
Operating Profit,3270729,2944527,2417679,2216137,1810856,2080069,2060395,1953051,1830223,1773098,1628891,1494803,1319414.0,1176062,OP,L
Net Income,2429604,2170314,1732972,1617221,1337536,1280869,1241007,1160241,1069744,1016480,930373,848974,738311.0,657049,NI,L


In [42]:
# Save to file

with open(PROJ_ROOT_PATH + '/pickle/master_statements_inc_stage4.pkl', 'wb') as f:
  pickle.dump(master_statements, f)

In [43]:
df = master_statements[ticker]
df = df.T
df['c']

Unnamed: 0,Net Sales,"Cost Of Sales, Including Warehouse And Delivery Expenses",Gross Profit,SGA,Operating Profit,Net Income
2022-8,16252230,7779580,8472650,5201921,3270729,2429604
2021-8,14629585,6911800,7717785,4773258,2944527,2170314
2020-8,12631967,5861214,6770753,4353074,2417679,1732972
2019-8,11863743,5498742,6365001,4148864,2216137,1617221
2018-8,11221077,5247331,5973746,4162890,1810856,1337536
2017-8,10888676,5149056,5739620,3659551,2080069,1280869
2016-8,10635676,5026940,5608736,3548341,2060395,1241007
2015-8,10187340,4860309,5327031,3373980,1953051,1160241
2014-8,9475313,4540406,4934907,3104684,1830223,1069744
2013-8,9147530,4406595,4740935,2967837,1773098,1016480
