In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from os import listdir
import re

import csv
import datetime
import pickle
import os

import calendar
import random

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 500)
pd.options.display.float_format = '{:.0f}'.format

In [3]:
DATA_ROOT_PATH="/mnt/data/projects/MD1/data/R1000/reports/"
PROJ_ROOT_PATH="/home/priyesh/projects/MD1"

In [4]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','clean_master_data_stage2.pkl')
clean_master_data = pd.read_pickle(filepath)

In [5]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','master_data_stage2.pkl')
master_data = pd.read_pickle(filepath)

In [6]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','mappings_cash_stage3.pkl')
label_mappings = pd.read_pickle(filepath)

In [7]:
heading_lookup = {}
line_item_lookup = {}

stype='cash'

temp = label_mappings['headings'].to_dict()
heading_lookup[stype] = temp['type']

temp = label_mappings['line_items'].to_dict()
line_item_lookup[stype] = temp['type']

In [8]:
def find_line_items(table, heading_lookup, line_item_lookup, stype):
 
  df_new = pd.DataFrame()

  h = 'No Heading'
  items_found = []
    
  h_lookup = heading_lookup[stype]
  l_lookup = line_item_lookup[stype]
    
  items_required = list(set(l_lookup.values()))

  # Go through each line in table, if row contains only nan then assume heading

  for index, row in table.iterrows():

    if row.isnull().all():
   
      # If all columns are null then it is either a header we are interested in, a header not recognised 
      # or a line item where values are not specified.
    
      # If we encounter a header that has Member or Abstract then terminate loop and ignore subsequent items.
    
      if index in h_lookup:
        
        h = h_lookup[index]
        
        if h == 'EXCL':
          break
        else:
          r1 = row.copy()
          #r1 = r1.rename(swap_text('H',index,stype))
          r1['type'] = h
          r1['line_type'] = 'H'
          df_new = pd.concat([df_new, pd.DataFrame([r1])])
  
    else:
      # If row matches one of the items mapped for current header then store
        
      if index in l_lookup:
        
        l = l_lookup[index]
        items_found.append(l)
          
        r1 = row.copy()
        #r1.rename(swap_text('L',index,stype))
        r1['type'] = l
        r1['line_type'] = 'L'
        df_new = pd.concat([df_new, pd.DataFrame([r1])])
        
    if re.search('Cash and Cash Equivalent At End Of Year$',index,re.IGNORECASE): 
        break
        
    if re.search('End Of Period$',index,re.IGNORECASE): 
        break
 
  return(df_new)

In [9]:
def extract_summary(stype):
    
  ticker_list = list(clean_master_data.keys())

  master_statements = {}

  for ticker in ticker_list:

    master_statements[ticker] = {}
    print(ticker)
    
    for yr in clean_master_data[ticker]:
    
      # Extract statement for each year
    
      master_statements[ticker]
      if stype in clean_master_data[ticker][yr]:
        
        master_statements[ticker][yr] = find_line_items(clean_master_data[ticker][yr][stype]['table'],
                                                          heading_lookup,line_item_lookup,stype)
  return master_statements

In [10]:
def conv_date_cols(cols):
    
  mnth_lookup = {month: index for index, month in enumerate(calendar.month_abbr) if month}

  new_cols = []

  for c in cols:
    
    x = re.search(r"^[A-Z][a-z][a-z]\. \d{2}, \d{4}", c)
    
    if x:
      mnth = mnth_lookup[c.split(".")[0]]
      yr = c[-4:]
      t = yr + '-' + str(mnth)
      new_cols = new_cols + [t]
    else:
      new_cols = new_cols + [c] 

  return new_cols

In [11]:
def find_duplicates(df1):

  df_temp = df1.copy()
  df_temp = df_temp.reset_index(names=['line_item'])
  df_dup = df_temp[df_temp.duplicated(subset=['line_item','line_type'])]

  return df_dup

In [12]:
def extract_statements(stype, master_summary_data):
    
  ticker_list = list(master_summary_data.keys())

  master_statements = {}

  for ticker in master_summary_data:
    
    print(ticker)
    statement = master_summary_data[ticker]
    keys = list(statement.keys())
    
    if len(keys) > 0:
                                            
      keys = sorted(keys,reverse=True)
      df = statement[keys[0]].copy()
      df = df[~df.index.duplicated(keep='first')]

      for i in range(1,len(keys)):
        
        print("Building Table for",ticker,i)
        
        df1_cols = list(set(statement[keys[i]].columns)- set(df.columns))
        df1 = statement[keys[i]][df1_cols].copy()
        df1 = df1[~df1.index.duplicated(keep='first')] 
        df = df.merge(df1, right_index=True, left_index=True, how='left', suffixes=['_x', '_y'])  
    
      # We could end up with an empty dataframe if there are no matches. So skip if that is the case.
     
      if len(df) > 0:
        # Amend date format of column names
        
        cols = list(df.columns)
        new_cols = conv_date_cols(cols)
        df.columns = new_cols

        # Sort in reverse date order, place type and line_type at end
    
        new_cols.remove('type')
        new_cols.remove('line_type')
        cols = sorted(new_cols,reverse=True) + ['type','line_type']
        df = df[cols]
        
      master_statements[ticker] = df

  return master_statements

In [13]:
df1 = clean_master_data['AEE']['18']['cash']['table']
df1

Unnamed: 0_level_0,"Dec. 31, 2017","Dec. 31, 2016","Dec. 31, 2015"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Income From Continuing Operations,,,
Net Income,529.0,659.0,636.0
"-Income Loss From Discontinued Operations, Net Of Tax",0.0,0.0,-51.0
Adjustments To Reconcile Net Income,,,
Provision For Callaway Construction And Operating License,0.0,0.0,69.0
Depreciation And Amortization,876.0,835.0,777.0
Amortization Of Nuclear Fuel,76.0,88.0,97.0
Amortization Of Debt Issuance Costs And Premium/Discounts,22.0,22.0,22.0
"Deferred Income Taxes And Investment Tax Credits, Net",539.0,386.0,369.0
Allowance For Equity Funds Used During Construction,-24.0,-27.0,-30.0


In [14]:
find_line_items(df1, heading_lookup, line_item_lookup, 'cash')


Unnamed: 0,"Dec. 31, 2017","Dec. 31, 2016","Dec. 31, 2015",type,line_type
Net Income,529.0,659.0,636.0,NI,L
Stock-Based Compensation,17.0,17.0,24.0,SC,L
Net Cash Provided By Operating Activities Continuing Operations,2104.0,2124.0,2035.0,NCO,L
Net Cash Provided By Operating Activities,2104.0,2123.0,2031.0,NCO,L
Cash Flow From Investing,,,,CFI,H
Capital Expenditure,-2132.0,-2076.0,-1917.0,CE,L
Net Cash Used In Investing Activities Continuing Operations,-2205.0,-2141.0,-1951.0,NCI,L
Net Cash Used In Investing Activities,-2205.0,-2141.0,-1976.0,NCI,L
Cash Flows From Financing Activities,,,,CFF,H
Net Cash Provided By Used In Financing Activities,102.0,-265.0,232.0,NCF,L


In [15]:
# Create lookup dictionaries

swap_text_dict = {}

swap_text_dict['H'] = {}
swap_text_dict['L'] = {}

In [16]:
cash_summary = extract_summary('cash')

AA
AAL
AAP
AAPL
ABBV
ABNB
ABT
ACGL
ACHC
ACI
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
ADT
AEE
AEP
AES
AFG
AFL
AFRM
AGCO
AGL
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGM
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AM
AMAT
AMC
AMCR
AMD
AME
AMED
AMG
AMGN
AMP
AMZN
AN
ANET
ANSS
AON
AOS
APA
APD
APH
APO
APP
APTV
AR
ARES
ARMK
ARW
ASH
ATO
ATR
ATUS
ATVI
AVGO
AVT
AVTR
AWI
AWK
AXON
AXS
AXTA
AYI
AYX
AZEK
AZO
AZTA
BA
BAC
BAH
BALL
BAX
BBWI
BBY
BC
BDX
BEN
BERY
BFAM
BG
BHF
BIIB
BILL
BIO
BJ
BK
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BSX
BSY
BURL
BWA
BWXT
BX
BYD
C
CABO
CACC
CAH
CAR
CARR
CAT
CB
CBOE
CBSH
CC
CCCS
CCK
CCL
CDAY
CDNS
CDW
CE
CEG
CERT
CF
CFG
CFLT
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHPT
CHRW
CHTR
CIEN
CINF
CL
CLH
CLVT
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNM
CNP
CNXC
COF
COIN
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CR
CRI
CRL
CRUS
CRWD
CSCO
CSL
CTLT
CTRA
CTSH
CTVA
CVNA
CVX
CW
CZR
D
DAL
DAR
DASH
DBX
DCI
DD
DDOG
DECK
DFS
DG
DGX
DHI
DHR
DINO
DIS
DISH
DKNG
DKS
DLB
DLTR
DNA
DOCS
DOCU
DOV
DOW
DPZ
DRVN
DT
DTE
DTM
DUK
DV
DVA
DVN
DXC
DX

In [20]:
len(cash_summary)

833

In [21]:
cash_summary['NFG']['22']

Unnamed: 0,"Sep. 30, 2022","Sep. 30, 2021","Sep. 30, 2020",type,line_type
Operating Activities,,,,CFO,H
Stock-Based Compensation,19506.0,17065.0,14931.0,SC,L
Net Cash Provided By Operating Activities,812521.0,791553.0,740809.0,NCO,L
Investing Activities,,,,CFI,H
Capital Expenditure,-811826.0,-751734.0,-716153.0,CE,L
Net Cash Used In Investing Activities,-518704.0,-633217.0,-1223616.0,NCI,L
Financing Activities,,,,CFF,H
Net Repurchases Of Common Stock,-9590.0,-3702.0,,SBB,L
Net Cash Provided By Used In Financing Activities,-276237.0,-58739.0,476088.0,NCF,L


In [22]:
dups_list = []
tickers_with_dups = {}

ticker_list = list(cash_summary.keys())

for ticker in ticker_list:
  
  print(ticker)
    
  for yr in cash_summary[ticker]:
    
    df_dup = find_duplicates(cash_summary[ticker][yr])
    
    if len(df_dup) > 0:
       
       dups = {}
       dups['yr'] = yr
       dups['df_orig'] = cash_summary[ticker][yr]
       dups['df_dup']  = df_dup
        
       tickers_with_dups[ticker] = dups


AA
AAL
AAP
AAPL
ABBV
ABNB
ABT
ACGL
ACHC
ACI
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
ADT
AEE
AEP
AES
AFG
AFL
AFRM
AGCO
AGL
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGM
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AM
AMAT
AMC
AMCR
AMD
AME
AMED
AMG
AMGN
AMP
AMZN
AN
ANET
ANSS
AON
AOS
APA
APD
APH
APO
APP
APTV
AR
ARES
ARMK
ARW
ASH
ATO
ATR
ATUS
ATVI
AVGO
AVT
AVTR
AWI
AWK
AXON
AXS
AXTA
AYI
AYX
AZEK
AZO
AZTA
BA
BAC
BAH
BALL
BAX
BBWI
BBY
BC
BDX
BEN
BERY
BFAM
BG
BHF
BIIB
BILL
BIO
BJ
BK
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BSX
BSY
BURL
BWA
BWXT
BX
BYD
C
CABO
CACC
CAH
CAR
CARR
CAT
CB
CBOE
CBSH
CC
CCCS
CCK
CCL
CDAY
CDNS
CDW
CE
CEG
CERT
CF
CFG
CFLT
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHPT
CHRW
CHTR
CIEN
CINF
CL
CLH
CLVT
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNM
CNP
CNXC
COF
COIN
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CR
CRI
CRL
CRUS
CRWD
CSCO
CSL
CTLT
CTRA
CTSH
CTVA
CVNA
CVX
CW
CZR
D
DAL
DAR
DASH
DBX
DCI
DD
DDOG
DECK
DFS
DG
DGX
DHI
DHR
DINO
DIS
DISH
DKNG
DKS
DLB
DLTR
DNA
DOCS
DOCU
DOV
DOW
DPZ
DRVN
DT
DTE
DTM
DUK
DV
DVA
DVN
DXC
DX

In [23]:
tickers_with_dups.keys()

dict_keys(['AAL', 'AEE', 'AGO', 'AGR', 'AIZ', 'APD', 'APTV', 'ARES', 'BMRN', 'BWA', 'BYD', 'COTY', 'D', 'DTE', 'ECL', 'ED', 'ES', 'ETSY', 'FAF', 'FCX', 'FISV', 'FIVN', 'FNF', 'GD', 'GE', 'GL', 'HAL', 'HE', 'HON', 'HUN', 'IAC', 'ICUI', 'IQV', 'KBR', 'KMPR', 'KR', 'LHX', 'LLY', 'MRNA', 'NEE', 'NFG', 'OLED', 'PAYC', 'PCG', 'PNR', 'POOL', 'PPL', 'PRI', 'SRE', 'STZ', 'TMO', 'TMUS', 'TOST', 'TRU', 'TTD', 'TXT', 'VAC', 'VIRT', 'VLO', 'VOYA', 'VRSK', 'W', 'WCC', 'WFC', 'WH', 'WMS', 'WTM', 'X', 'YUM'])

In [24]:
len(tickers_with_dups)

69

In [28]:
ticker='TMUS'
yr = tickers_with_dups[ticker]['yr']
tickers_with_dups[ticker]['df_dup']

Unnamed: 0,line_item,"Dec. 31, 2013","Dec. 31, 2012","Dec. 31, 2011",type,line_type
4,Net Cash Provided By Operating Activities,3545,3862,4980,NCO,L


In [29]:
tickers_with_dups[ticker]['df_orig']

Unnamed: 0,"Dec. 31, 2013","Dec. 31, 2012","Dec. 31, 2011",type,line_type
Operating Activities,,,,CFO,H
Net Income,35.0,-7336.0,-4718.0,NI,L
Stock-Based Compensation,100.0,0.0,0.0,SC,L
Net Cash Provided By Operating Activities,141.0,50.0,196.0,NCO,L
Net Cash Provided By Operating Activities,3545.0,3862.0,4980.0,NCO,L
Investing Activities,,,,CFI,H
Purchases Of Property And Equipment,-4025.0,-2901.0,-2729.0,PPE,L
Net Cash Used In Investing Activities,-2092.0,-3915.0,-4699.0,NCI,L
Financing Activities,,,,CFF,H
Net Cash Provided By Financing Activities,4044.0,57.0,0.0,NCF,L


In [30]:
df1 = clean_master_data[ticker][yr]['cash']['table']
df1

Unnamed: 0_level_0,"Dec. 31, 2013","Dec. 31, 2012","Dec. 31, 2011"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Operating Activities,,,
Net Income,35.0,-7336.0,-4718.0
Adjustments To Reconcile Net Income,,,
Impairment Charges,0.0,8134.0,6420.0
Depreciation And Amortization,3627.0,3187.0,2982.0
Stock-Based Compensation,100.0,0.0,0.0
Deferred Income Tax Expense Benefit,10.0,308.0,-233.0
"Amortization Of Debt Discount And Premium, Net",-62.0,-81.0,-84.0
Bad Debt Expense,463.0,702.0,713.0
Deferred Rent Expense,229.0,206.0,218.0


In [31]:
master_data[ticker][yr]['cash']

Unnamed: 0_level_0,"Consolidated Statements of Cash Flows (USD $) In Millions, unless otherwise specified",12 Months Ended,12 Months Ended,12 Months Ended
Unnamed: 0_level_1,"Consolidated Statements of Cash Flows (USD $) In Millions, unless otherwise specified","Dec. 31, 2013","Dec. 31, 2012","Dec. 31, 2011"
0,Operating activities,,,
1,Net income (loss),$ 35,"$ (7,336)","$ (4,718)"
2,Adjustments to reconcile net income (loss) to net cash provided by operating activities,,,
3,Impairment charges,0,8134,6420
4,Depreciation and amortization,3627,3187,2982
5,Stock-based compensation expense,100,0,0
6,Deferred income tax expense (benefit),10,308,(233)
7,"Amortization of debt discount and premium, net",(62),(81),(84)
8,Bad debt expense,463,702,713
9,Deferred rent expense,229,206,218


In [32]:
# Remove companies with duplicates

for ticker in tickers_with_dups.keys():
   print(ticker)
   clean_master_data.pop(ticker)

AAL
AEE
AGO
AGR
AIZ
APD
APTV
ARES
BMRN
BWA
BYD
COTY
D
DTE
ECL
ED
ES
ETSY
FAF
FCX
FISV
FIVN
FNF
GD
GE
GL
HAL
HE
HON
HUN
IAC
ICUI
IQV
KBR
KMPR
KR
LHX
LLY
MRNA
NEE
NFG
OLED
PAYC
PCG
PNR
POOL
PPL
PRI
SRE
STZ
TMO
TMUS
TOST
TRU
TTD
TXT
VAC
VIRT
VLO
VOYA
VRSK
W
WCC
WFC
WH
WMS
WTM
X
YUM


In [33]:
# Repeat the extraction. This time we shouldn't have any duplicates.

cash_summary = extract_summary('cash')

AA
AAP
AAPL
ABBV
ABNB
ABT
ACGL
ACHC
ACI
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
ADT
AEP
AES
AFG
AFL
AFRM
AGCO
AGL
AGNC
AIG
AJG
AKAM
AL
ALB
ALGM
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AM
AMAT
AMC
AMCR
AMD
AME
AMED
AMG
AMGN
AMP
AMZN
AN
ANET
ANSS
AON
AOS
APA
APH
APO
APP
AR
ARMK
ARW
ASH
ATO
ATR
ATUS
ATVI
AVGO
AVT
AVTR
AWI
AWK
AXON
AXS
AXTA
AYI
AYX
AZEK
AZO
AZTA
BA
BAC
BAH
BALL
BAX
BBWI
BBY
BC
BDX
BEN
BERY
BFAM
BG
BHF
BIIB
BILL
BIO
BJ
BK
BKNG
BLD
BLDR
BLK
BMY
BOKF
BR
BRKR
BRO
BSX
BSY
BURL
BWXT
BX
C
CABO
CACC
CAH
CAR
CARR
CAT
CB
CBOE
CBSH
CC
CCCS
CCK
CCL
CDAY
CDNS
CDW
CE
CEG
CERT
CF
CFG
CFLT
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHPT
CHRW
CHTR
CIEN
CINF
CL
CLH
CLVT
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNM
CNP
CNXC
COF
COIN
COLB
COLM
COO
COST
CPB
CPRI
CPRT
CR
CRI
CRL
CRUS
CRWD
CSCO
CSL
CTLT
CTRA
CTSH
CTVA
CVNA
CVX
CW
CZR
DAL
DAR
DASH
DBX
DCI
DD
DDOG
DECK
DFS
DG
DGX
DHI
DHR
DINO
DIS
DISH
DKNG
DKS
DLB
DLTR
DNA
DOCS
DOCU
DOV
DOW
DPZ
DRVN
DT
DTM
DUK
DV
DVA
DVN
DXC
DXCM
EA
EBAY
EEFT
EFX
EHAB
EHC
EIX
EL
ELAN
EMN
EMR
ENOV
ENPH

In [34]:
key_list = list(cash_summary.keys())
len(key_list)

764

In [44]:
key_list = list(cash_summary.keys())
i = random.randint(0,len(key_list))
ticker=key_list[i]
print(ticker)
cash_summary[ticker]['22']

EOG


Unnamed: 0,"Dec. 31, 2021","Dec. 31, 2020","Dec. 31, 2019",type,line_type
Net Income,4664.0,-605.0,2735.0,NI,L
Stock-Based Compensation,152.0,146.0,175.0,SC,L
Net Cash Provided By Operating Activities,8791.0,5008.0,8163.0,NCO,L
Cash Flow From Investing,,,,CFI,H
"Additions To Other Property, Plant And Equipment",-212.0,-221.0,-270.0,PPE,L
Net Cash Used In Investing Activities,-3419.0,-3348.0,-6177.0,NCI,L
Cash Flows From Financing Activities,,,,CFF,H
Net Cash Used In Financing Activities,-3493.0,-359.0,-1513.0,NCF,L


In [45]:
print(ticker)
clean_master_data[ticker]['22']['cash']['table']

EOG


Unnamed: 0_level_0,"Dec. 31, 2021","Dec. 31, 2020","Dec. 31, 2019"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Income From Continuing Operations,,,
Net Income,4664.0,-605.0,2735.0
Items Not Requiring Providing Cash,,,
"Depreciation, Depletion And Amortization",3651.0,3400.0,3750.0
Impairments,376.0,2100.0,518.0
Stock-Based Compensation,152.0,146.0,175.0
Deferred Income Taxes,-122.0,-186.0,632.0
"-Gains Losses On Asset Dispositions, Net",-17.0,47.0,-124.0
"Other, Net",13.0,12.0,4.0
Dry Hole Costs,71.0,13.0,28.0


In [46]:
ticker = 'PPL'
yr=tickers_with_dups[ticker]['yr']
print(yr)
tickers_with_dups[ticker]['df_dup']

16


Unnamed: 0,line_item,"Dec. 31, 2015","Dec. 31, 2014","Dec. 31, 2013",type,line_type
3,Net Cash Provided By Used In Operating Activities,2615,3403,2856,NCO,L
7,Net Cash Provided By Used In Investing Activities,-3588,-3329,-4295,NCI,L
11,Net Cash Provided By Used In Financing Activities,68,583,1631,NCF,L


In [None]:

df1 = clean_master_data[ticker][yr]['cash']['table']
df1

In [None]:
master_data[ticker][yr]['cash']

In [None]:
df1 = clean_master_data[ticker][yr]['cash']['table']
find_line_items(df1, heading_lookup, line_item_lookup, 'cash')

In [None]:
cash_summary[ticker][yr]

In [None]:
find_duplicates(cash_summary[ticker][yr])

### Build Statements

In [67]:
#master_statements = extract_statements('cash',cash_summary)

In [64]:
#master_statements['AAPL']

In [116]:
key_list = list(cash_summary.keys())
i = random.randint(0,len(key_list))
ticker=key_list[i]
print(ticker)
master_statements[ticker]

BBY


Unnamed: 0,2023-1,2022-1,2021-1,2020-2,2019-2,2018-2,2017-1,2015-1,2014-2,2012-3,2011-2,2010-2,($)-2,($)-2.1,($)-2.2,($)-2.3,($)-1,($)-1.1,($)-1.2,($)-1.3,($)-1.4,($)-1.5,($)-1.6,($)-1.7,($)-1.8,type,line_type
Operating Activities,,,,,,,,,,,,,,,,,,,,,,,,,,CFO,H
Stock-Based Compensation,138.0,141.0,135.0,143.0,123.0,129.0,108.0,87.0,90.0,120.0,121.0,118.0,129.0,90.0,129.0,90.0,104.0,108.0,87.0,104.0,108.0,87.0,104.0,108.0,87.0,SC,L
Total Cash Provided By Operating Activities,1824.0,3252.0,4927.0,2565.0,2408.0,2141.0,2557.0,1935.0,1094.0,3293.0,,,2141.0,1094.0,2141.0,1094.0,1343.0,2557.0,1935.0,1343.0,2557.0,1935.0,1343.0,2557.0,1935.0,NCO,L
Investing Activities,,,,,,,,,,,,,,,,,,,,,,,,,,CFI,H
"Additions To Property And Equipment, Net Of 35, 46 And 32, Respectively, Of Non-Cash Capital Expenditures",-930.0,-737.0,-713.0,,,,,,,,,,,,,,,,,,,,,,,CE,L
Total Cash Used In Investing Activities,-962.0,-1372.0,-788.0,-895.0,,,,-1712.0,-517.0,-724.0,,,-1002.0,-517.0,-1002.0,-517.0,-526.0,-877.0,-1712.0,-526.0,-877.0,-1712.0,-526.0,-877.0,-1712.0,NCI,L
Financing Activities,,,,,,,,,,,,,,,,,,,,,,,,,,CFF,H
Repurchase Of Common Stock,-1014.0,-3502.0,-312.0,-1003.0,-1505.0,-2004.0,-698.0,0.0,0.0,-1500.0,-1193.0,0.0,-2004.0,0.0,-2004.0,0.0,-1000.0,-698.0,0.0,-1000.0,-698.0,0.0,-1000.0,-698.0,0.0,SBB,L
Total Cash Used In Financing Activities,-1806.0,-4297.0,-876.0,-1498.0,-2018.0,-2297.0,-1418.0,,,,,,-2297.0,,-2297.0,,-1536.0,-1418.0,-223.0,-1536.0,-1418.0,-223.0,-1536.0,-1418.0,-223.0,NCF,L


In [117]:
master_data[ticker]['16']['cash']



Unnamed: 0_level_0,CONSOLIDATED STATEMENTS OF CASH FLOWS $ in Millions,12 Months Ended,12 Months Ended,12 Months Ended,12 Months Ended,12 Months Ended,12 Months Ended
Unnamed: 0_level_1,CONSOLIDATED STATEMENTS OF CASH FLOWS $ in Millions,"Jan. 30, 2016 USD ($)","Jan. 30, 2016 USD ($).1","Jan. 31, 2015 USD ($)","Jan. 31, 2015 USD ($).1","Feb. 01, 2014 USD ($)","Feb. 01, 2014 USD ($).1"
0,OPERATING ACTIVITIES,,,,,,
1,Net earnings including noncontrolling interests,$ 897,,"$ 1,235",,$ 523,
2,Adjustments to reconcile net earnings (loss) to total cash provided by operating activities,,,,,,
3,Depreciation,657,,656,,701,
4,Amortization of definite-lived intangible assets,0,,0,,15,
5,Restructuring charges,201,,23,,259,
6,(Gain) Loss on sale of business,(99),,(1),,143,
7,Stock-based compensation,104,,87,,90,
8,Deferred income taxes,49,,(297),,(28),
9,"Other, net",38,,8,,62,


In [119]:
clean_master_data[ticker]['16']['cash']['table']

Unnamed: 0_level_0,"Jan. 30, 2016 USD ($)","Jan. 31, 2015 USD ($)","Feb. 01, 2014 USD ($)"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Operating Activities,,,
Net Earnings Including Noncontrolling Interests,897.0,1235.0,523.0
Adjustments To Reconcile Net Earnings Loss To Total Cash Provided By Operating Activities,,,
Depreciation,657.0,656.0,701.0
Amortization Of Definite-Lived Intangible Assets,0.0,0.0,15.0
Restructuring Charges,201.0,23.0,259.0
-Gain Loss On Sale Of Business,-99.0,-1.0,143.0
Stock-Based Compensation,104.0,87.0,90.0
Deferred Income Taxes,49.0,-297.0,-28.0
"Other, Net",38.0,8.0,62.0
