In [1]:
import numpy as np
import pandas as pd
import os
import pickle

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.2f}'.format

In [3]:
DATA_ROOT_PATH="/mnt/data/projects/MD1/data/R1000/reports/"
PROJ_ROOT_PATH="/home/priyesh/projects/MD1"

In [4]:
filepath = os.path.join(PROJ_ROOT_PATH,'pickle/ticker_list.pkl')
ticker_list = pd.read_pickle(filepath)

In [5]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','clean_master_data_stage2.pkl')
clean_master_data = pd.read_pickle(filepath)

In [6]:
def get_line_items(t):
 
  headings = []
  line_items = []
    
  for index, row in t.iterrows():
    
    if row.replace('', np.nan).isna().all():
      # Blank row, so assume heading
    
      headings.append(index)
    else:    
      line_items.append(index)

  return headings, line_items

In [7]:
def get_table_labels(stype):

  master_headings = []
  master_line_items = []

  ticker_list = list(clean_master_data.keys())

  for ticker in ticker_list:
    
    print(ticker)
    
    for yr in clean_master_data[ticker]:
      
      # Allow for the likelihood that statement might be missing for given ticker and yr
        
      if stype in clean_master_data[ticker][yr]:
        try:
          headings, line_items = get_line_items(clean_master_data[ticker][yr][stype]['table'])
    
          master_headings = master_headings + headings
          master_line_items = master_line_items + line_items
        except:
          print("Error : {} {}".format(ticker, yr))
          exit() 

  # Remove duplicates from headings and line items

  unique_list = set(master_headings)
  headings = list(unique_list)

  unique_list = set(master_line_items)
  line_items = list(unique_list)

  return {'headings' : headings,
          'line_items' : line_items}


In [8]:
table_labels = get_table_labels('cash')

AA
AAL
AAP
AAPL
ABBV
ABNB
ABT
ACGL
ACHC
ACI
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
ADT
AEE
AEP
AES
AFG
AFL
AFRM
AGCO
AGL
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGM
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AM
AMAT
AMC
AMCR
AMD
AME
AMED
AMG
AMGN
AMP
AMZN
AN
ANET
ANSS
AON
AOS
APA
APD
APH
APO
APP
APTV
AR
ARES
ARMK
ARW
ASH
ATO
ATR
ATUS
ATVI
AVGO
AVT
AVTR
AWI
AWK
AXON
AXS
AXTA
AYI
AYX
AZEK
AZO
AZTA
BA
BAC
BAH
BALL
BAX
BBWI
BBY
BC
BDX
BEN
BERY
BFAM
BG
BHF
BIIB
BILL
BIO
BJ
BK
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BSX
BSY
BURL
BWA
BWXT
BX
BYD
C
CABO
CACC
CAH
CAR
CARR
CAT
CB
CBOE
CBSH
CC
CCCS
CCK
CCL
CDAY
CDNS
CDW
CE
CEG
CERT
CF
CFG
CFLT
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHPT
CHRW
CHTR
CIEN
CINF
CL
CLH
CLVT
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNM
CNP
CNXC
COF
COIN
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CR
CRI
CRL
CRUS
CRWD
CSCO
CSL
CTLT
CTRA
CTSH
CTVA
CVNA
CVX
CW
CZR
D
DAL
DAR
DASH
DBX
DCI
DD
DDOG
DECK
DFS
DG
DGX
DHI
DHR
DINO
DIS
DISH
DKNG
DKS
DLB
DLTR
DNA
DOCS
DOCU
DOV
DOW
DPZ
DRVN
DT
DTE
DTM
DUK
DV
DVA
DVN
DXC
DX

In [9]:
clean_master_data['WMT']['15']['cash']['table']

Unnamed: 0_level_0,"Jan. 31, 2015","Jan. 31, 2014","Jan. 31, 2013"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Income From Continuing Operations,,,
Consolidated Net Income,17099.0,16695.0,17756.0
"Income From Discontinued Operations, Net Of Income Taxes",-285.0,-144.0,-52.0
Income From Continuing Operations,16814.0,16551.0,17704.0
Adjustments To Reconcile Net Income,,,
Depreciation And Amortization,9173.0,8870.0,8478.0
Deferred Income Taxes,-503.0,-279.0,-133.0
Other Operating Activities,785.0,938.0,602.0
"Changes In Certain Assets And Liabilities, Net Of Effects Of Acquisitions:",,,
"Receivables, Net",-569.0,-566.0,-614.0


## Map Headings


In [10]:
df = pd.DataFrame(table_labels['headings'],columns=['heading'])
df['type'] = 'X'
df = df.set_index('heading')
df

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Prepaid Expenses And Other Assets,X
Net Cash Provided By Used In Financing Activities,X
Class A Common Stock,X
"Changes In Current Assets And Liabilities, Net Of Effects From Acquisition/Disposition Of Businesses:",X
Changes In Finance Receivable Related Assets:,X
...,...
New Financing,X
Sales Purchases Of,X
Successor,X
"Increase Decrease In Cash Resulting From Changes In Operating Assets And Liabilities, Net Of Effects Of Acquisitions:",X


In [11]:
#Cash Sheet Mappings for headings:

#CFO   Cask flow from operating activities
#CFI   Cash flow from investing activities
#CFF   Cash flow from financing activities

In [14]:
filter = df.index.str.contains(r"^Income",case=True, regex=True, na=False) & \
            df.index.str.contains(r"Cont",case=True, regex=True, na=False) & \
            df.index.str.contains(r"Oper",case=True, regex=True, na=False)
        
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Income From Continuing Operations,X


In [None]:
df[filter] = 'CFO'

In [12]:
#CFO   Cask flow from operating activities

filter = df.index.str.contains(r"Cash flow",case=False, regex=True, na=False) & \
            df.index.str.contains(r"operating",case=False, regex=True, na=False)
        
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Reconciliation Of Net Income To Cash Flows From Operating Activities:,X
Adjustments To Reconcile Net Loss To Cash Flows From Operating Activities:,X
Cash Flows Due To Changes In Operating Assets And Liabilities,X
Non-Cash Adjustments Reconciling Net Loss To Operating Cash Flows,X
Adjustments To Reconcile Net Loss Income To Net Cash Flows Used For Provided By Operating Activities:,X
Adjustments To Reconcile Net Earnings To Net Cash Flows Provided By Operating Activities:,X
Reconciliations Of Net Loss To Net Cash Flows Provided By Operating Activities:,X
Adjustments To Reconcile Net Loss Income To Net Cash Flows From Operating Activities:,X
Adjustments To Reconcile Net Loss To Net Cash Flows Used In Operating Activities:,X
Net Cash Flows Provided By Operating Activities,X


In [13]:
df[filter] = 'CFO'

In [14]:
filter = df.index.str.contains(r"Activities",case=False, regex=True, na=False) & \
            df.index.str.contains(r"operating",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"Abstract|net",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Operating Activities Of Continuing Operations,X
Increase Decrease In Cash And Cash Equivalents Operating Activities:,X
Non-Cash Operating Activities Are As Follows:,X
Cash Provided By Used For Operating Activities:,X
Cash From Operating Activities:,X
Cash Provided By Operating Activities,X
Adjustments To Reconcile Cash Provided By Operating Activities:,X
Operating Activities:,X
Non-Cash Operating Activities:,X
Adjustments To Reconcile Income To Cash Provided By Operating Activities:,X


In [15]:
df[filter] = 'CFO'

In [16]:
#CFI   Cask flow from Investment activities

filter = df.index.str.contains(r"Cash flow",case=False, regex=True, na=False) & \
            df.index.str.contains(r"invest",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|vehicles|Lessee|Abstract",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Cash Flow From Investing,X
Supplementary Cash Flow Information On Non-Cash Investing And Financing Activities,X
Cash Flows Relating To Investing Activities,X
Net Increase Decrease In Cash Flows From Investing Activities:,X
Net Cash Flows From Investing Activities:,X
Cash Flows Provided By Disbursed For Investing Activities:,X
Net Cash Flow Used For Investing Activities,X
Cash Flow Investing Activities From Continuing Activities,X
Net Cash Flows Used In Investing Activities,X
Net Cash Flows From Used For Investing Activities,X


In [17]:
df[filter] = 'CFI'

In [18]:
filter = df.index.str.contains(r"Activities",case=False, regex=True, na=False) & \
            df.index.str.contains(r"Investing",case=False, regex=True, na=False)

     
         
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Cash Used For Investing Activities,X
Investing Activities [Abstract],X
Schedule Of Non Cash Investing And Financing Activities:,X
Investing Activities Of Continuing Operations:,X
Supplemental Disclosures Of Noncash Investing And Financing Activities:,X
Cash Provided Required By Investing Activities Of Discontinued Operations:,X
Supplemental Schedule Of Noncash Investing And Financing Activities:,X
Supplemental Schedule Of Noncash Investing And Financing Activities: [Abstract],X
Supplemental Cash Flow Disclosures For Non-Cash Investing And Financing Activities:,X
Investing Activities,X


In [19]:
df[filter] = 'CFI'

In [20]:
#CFF   Cash flow from financing activities

filter = df.index.str.contains(r"Cash flow",case=False, regex=True, na=False) & \
            df.index.str.contains(r"financ",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|vehicles|Lessee|Abstract",case=False, regex=True, na=False)
       

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Supplementary Cash Flow Information On Non-Cash Investing And Financing Activities,CFI
Net Cash Flow Used For Financing Activities,X
Net Cash Flow Provided By Used For Financing Activities,X
Net Cash Flow Used For Provided By Financing Activities,X
Cash Flows From Financing Activities Attributable to Continuing Operations,X
Net Cash Flows From Financing Activities:,X
Net Cash Flows Provided By Used In Financing Activities,X
Cash Flows Provided By Disbursed For Financing Activities:,X
Net Cash Flow Financing Activities Discontinued Operations,X
Cash Flows Relating To Financing Activities,X


In [21]:
df[filter] = 'CFF'

In [22]:
filter = df.index.str.contains(r"Activities",case=False, regex=True, na=False) & \
            df.index.str.contains(r"Finan",case=False, regex=True, na=False)
        
        
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Financing Activities From Continuing Operations:,X
Schedule Of Non Cash Investing And Financing Activities:,CFI
Supplemental Disclosures Of Noncash Investing And Financing Activities:,CFI
Non-Cash Financing Activities Items Arising From Llc Interest Ownership Changes,X
Supplemental Schedule Of Noncash Investing And Financing Activities:,CFI
Supplemental Schedule Of Noncash Investing And Financing Activities: [Abstract],CFI
Supplemental Cash Flow Disclosures For Non-Cash Investing And Financing Activities:,CFI
Non-Cash Investing And Financing Activities Were As Follows:,CFI
Supplemental Schedule Of Non-Cash Investing And Financing Activities,CFI
Supplemental Noncash Investing And Financing Activities:,CFI


In [23]:
df[filter] = 'CFF'

In [24]:
filter = df.index.str.contains(r"Member\]$",case=True, regex=True, na=False)
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Convertible Senior Notes [Member] | 1 Percent Convertible Senior Notes [Member],X
Guest Loyalty Program [Member],X
4.50% Senior Notes [Member] | Senior Notes [Member],X
Successor [Member] | Automotive [Member],X
"Dominion Energy Gas Holdings, Llc | Cove Point [Member]",X
"Segment, Discontinued Operations [Member]",X
Hospira [Member],X
Additional Paid-In Capital [Member],X
6.75% Senior Notes Due 2018 [Member],X
System Energy [Member],X


In [25]:
df[filter] = 'EXCL'

In [26]:
df_headings = df[df['type'] != 'X']
df_headings

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Convertible Senior Notes [Member] | 1 Percent Convertible Senior Notes [Member],EXCL
Cash Used For Investing Activities,CFI
Financing Activities From Continuing Operations:,CFF
Guest Loyalty Program [Member],EXCL
Investing Activities [Abstract],CFI
...,...
Prior Revolving Credit Agreement [Member],EXCL
Significant Noncash Investing And Financing Activities:,CFF
Cash Used For Financing Activities,CFF
Hlikk And Hlil [Member],EXCL


## Map Line Items

In [27]:
#Cash SHeet Mappings for line items:

#NI     Net Income
#NCO    Net Cash Provide by Operating Activities
#SC     Stock based compensation
#D      Dividends
#SBB    Stock buy backs
#NCF    Net Cash used for financing purposes
#NCI    Net Cash Used fot Investing 
#CE     Capital Expenditure
#PPE    Property, Plant and Equipment

In [28]:
df = pd.DataFrame(table_labels['line_items'],columns=['line_items'])
df['type'] = 'X'
df = df.set_index('line_items')
df

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Payment Of Tender Premium On Debt Extinguishment,X
Repurchase Of Common Shares And Shares Withheld In Lieu Of Income Taxes,X
Distribution Of Deferred Compensation From Trust Assets,X
Purchase Of Joint Venture Minority Interest,X
Provision For Inventory Obsolescence,X
...,...
Trade Loan Repayments From Customers,X
Payments Of Short And Long-Term Borrowings,X
Repayments Received From Loans And Deposits To Third Parties,X
Net Payments Or Proceeds On Derivative Liability Contracts,X


In [29]:
#NI   Net Income

filter = df.index.str.contains(r"Net income",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|classified",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"invest|stock|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|interest|fair|equity",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Income Loss And Comprehensive Income Loss,X
Net Income Attributable To The Timken Company,X
Net Income Loss Attributable To Johnson Controls,X
Boyds Share Of Borgatas Net Income,X
Net Income Attributable To Parent,X
Increase Decrease In Net Income Taxes Payable,X
Net Income Loss Attributable To Parent,X
Net Income Loss Attributable To Tyco Common Shareholders,X
Net Income Attributable To Air Products,X
Net Income Attributable To Tyco Common Shareholders,X


In [30]:
df[filter] = 'NI'

In [31]:
#NCO    Net Cash Provide by Operating Activities

filter = df.index.str.contains(r"cash",case=False, regex=True, na=False) & \
            df.index.str.contains(r"operating",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|vehicles|classified",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"reclass|lease|inoperating|working",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"estate|restricted|reorg|note|sale|proceed",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"divested|equipment|property",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"invest|stock|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|interest",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"escrow|overdraft",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Cash And Cash Equivalents Provided By Used In Operating Activities,X
Net Cash Provided Used By Operating Activities,X
Net Cash From Continuing Operating Activities,X
Other Non-Cash Operating Income,X
Cash Provided By Used For Operating Activities Continuing Operations,X
"Net Cash Provided By Operating Activities, Net Of Effect Of Acquisition",X
Net Cash Flows Provided Used By Operating Activities,X
Cash From Operating Activities,X
Cash Provided Used By Operating Activities,X
Net Cash Flow Provided By Operating Activities,X


In [32]:
df[filter] = 'NCO'

In [34]:
#SC  Stock based compensation


filter = df.index.str.contains(r"compensation",case=False, regex=True, na=False) & \
            df.index.str.contains(r"stock|share",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"invest|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|interest|Excess|Tax|Issuan|Proc",case=False, regex=True, na=False)
         

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Market Value Of Common Shares Received From Employees In Connection With Share-Based Compensation See Note 17,X
Share-Based Compensation Capitalized In Property And Equipment,X
Stock Option And Long-Term Compensation Expense,X
Stock-Based Compensation To Board Of Directors And Scientific Advisory Board,X
Common Stock Repurchased Share Based Compensation Plans,X
Repurchases Of Common Stock For Stock-Based Compensation,X
Non-Cash Stock-Based Compensation Expense,X
Stock Option Compensation Expense,X
Non-Cash Stock Compensation And Other Charges,X
Amortization Of Unearned Share-Based Compensation,X


In [35]:
df[filter] = 'SC'

In [36]:
#D  Dividends

filter = df.index.str.contains(r"dividends",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"noncontrolling|kind|change",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"received|declared|former|affiliated",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"convertible|series",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"invest|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|tax|interest",case=False, regex=True, na=False)
       

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Dividends From Unconsolidated Affiliate,X
Dividends Of Earnings From Unconsolidated Affiliate,X
Dividends-Class A Stockholders,X
Dividends Accrued On Restricted Stock Plans,X
Payment Of Preferred Stock Dividends,X
Payments Of Dividends,X
Cash Dividends Paid To Preferred Shareholders,X
Cash Dividends Paid On Common Stock,X
Dividends Payments,X
Payment Of Dividends And Distributions,X


In [37]:
df[filter] = 'D'

In [34]:
# Cash and Cash Equivalent

filter = df.index.str.contains(r"Cash Equivalent",case=True, regex=True, na=False) & \
           ~df.index.str.contains(r"Ext|Held|Summary|Suppl|Ret",case=True, regex=True, na=False) & \
           ~df.index.str.contains(r"Reclass|Recon|Exch|Foreign|Less|Change|Dec",case=True, regex=True, na=False)
         

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
"Cash, Cash Equivalents, And Restricted Cash Consisted Of The Following:",X
Cash and Cash Equivalent At End Of Year,X
"Cash And Cash Equivalents Of Discontinued Operations, Beginning Of Year",X
Cash and Cash Equivalent,X
Total Cash and Cash Equivalent At Begining Of Period,X
"Disclosure Of Cash, Cash Equivalents And Restricted Cash:",X


In [None]:
df[filter] = 'Cash'

In [43]:
# Cash and Cash Equivalent

filter = df.index.str.contains(r"Loss",case=True, regex=True, na=False)
        
         

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Adjustments To Reconcile Net Earnings Loss To Cash Provided By Used In Continuing Operating Activities:,X
Provision Reduction For Losses On Trade Accounts Receivable,X
Adjustments To Reconcile Net Loss Income To Net Cash Used In And Provided By Operating Activities:,X
Adjustments To Reconcile Net Loss Income To Net Cash From Operating Activities,X
Adjustments To Reconcile Net Loss Income To Net Cash Provided By Continuing Operations:,X
-Gains / Losses On Sale Of Other Assets,X
Adjustments To Reconcile Net Loss Income To Cash Provided By Used In Operating Activities:,X
Adjustments To Reconcile Income Loss From Continuing Operations To Net Cash Provided By Operating Activities:,X
-Gains/Losses On:,X
Adjustments To Reconcile Net Earnings Loss To Net Cash From Operating Activities:,X


In [None]:
df[filter] = 'NI'

In [16]:
# Depreciation and Amortization

filter = df.index.str.contains(r"Depreciation",case=True, regex=True, na=False) & \
            df.index.str.contains(r"Amort",case=True, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
"Depreciation, Impairment Charges And Amortization Expense",X
Depreciation And Amortization:,X
Depreciation And Amortization,X


In [None]:
df[filter] = 'Dep'

In [19]:
# Debt

filter = df.index.str.contains(r"Debt",case=True, regex=True, na=False) & \
            df.index.str.contains(r"Proceeds|Payment",case=True, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Payments On Long-Term Debt,X
"Proceeds From Issuance Of Debt, Net Of Discount And Premium",X
"Proceeds From Long-Term Debt, Net Occidental",X
Payment Of 7% Senior Notes Due 2014 [Member] | Unsecured Debt [Member],X
"Proceeds From Issuance Of Debt, Net Of Discount And Fees",X
Proceeds From Issuance Of Debt Original Maturities Greater Than Three Months,X
Payment Of Floating Rate Senior Notes Due 2013 [Member] | Unsecured Debt [Member],X
Payments On Debt Original Maturities Greater Than Three Months:,X
Proceeds From Debt Issued Original Maturities Greater Than Three Months:,X
Payments On Debt Original Maturities Greater Than Three Months,X


In [None]:
df[filter] = 'Debt'

In [18]:
# Proceeds From

filter = df.index.str.contains(r"Proceeds",case=True, regex=True, na=False) & \
            df.index.str.contains(r"Ipo|Issuance|Stock",case=True, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
"Proceeds From Issuance Of Debt, Net Of Discount And Premium",X
"Proceeds From Issuance Of Preferred Stock, Net",X
Proceeds From Sale Of Class F Common Stock To Sponsor,X
Proceeds From Issuance Of Stock,X
"Proceeds From Issuance Of Common Stock, Net",X
"Proceeds From Issuance Of Debt, Net Of Discount And Fees",X
Proceeds From The Issuance Of:,X
Proceeds From Issuance Of Common Stock,X
Proceeds From Issuance Of Debt Original Maturities Greater Than Three Months,X
Net Proceeds From Issuance Of Preferred Stock,X


In [None]:
df[filter] = 'Proc'

In [38]:
#SBB    Stock buy backs

filter = df.index.str.contains(r"Purchase.*Common.*Stock",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Repurchases Of Common Stock, Including Accelerated Share Repurchase",X
Purchase Of Capped Call And Common Stock Forward,X
"Repurchase Of Common Stock, Including Accelerated Share Repurchase Program",X
Repurchases Of Common Stock For Stock-Based Compensation,SC
Amounts Accrued For Repurchases Of Common Stock,X
Repurchases Of Common And Capital Stock,X
Repurchase Of Common Stock And Forward Contract,X
Repurchases Of Common Stock To Satisfy Employee Withholding Tax Obligations,X
Repurchases Of Restricted Common Stock From Terminated Employees,X
Purchases Of Company Common Stock,X


In [39]:
df[filter] = 'SBB'

In [40]:
filter = df.index.str.contains(r"repurchase",case=False, regex=True, na=False) & \
            df.index.str.contains(r"stock|share",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"compensation",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"treasury|warrant|liability|unvested",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Repurchase Of Common Shares And Shares Withheld In Lieu Of Income Taxes,X
"Repurchases Of Common Stock, Including Accelerated Share Repurchase",SBB
"Repurchase Of Common Stock, Including Accelerated Share Repurchase Program",SBB
Cash Received From Share Repurchase Contracts,X
Repurchase Of Common Shares By Subsidiary,X
Share Repurchases Pursuant To Share Repurchase Programs,X
Common Stock Repurchase Program,X
Settlement Prepayment Of Accelerated Share Repurchase Program,X
Repurchase Of Common Shares Including Shares Withheld In Lieu Of Income Taxes,X
Forward Contract For Accelerated Share Repurchases,X


In [41]:
df[filter] = 'SBB'

In [42]:
#NCF  Net Cash used for financing purposes

filter = df.index.str.contains(r"cash",case=False, regex=True, na=False) & \
            df.index.str.contains(r"financing",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|vehicles|classified|attrib",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"reclass|lease|inoperating|working",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"estate|restricted|reorg|note|sale|proceed",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"divested|equipment|property|non",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"invest|stock|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|tax|interest|fair|equity|other",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"-1|escrow|overdraft|instruments|loan",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"discontinuing|derivatives|program",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"collateral|adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Cash Used In Financing Activities Of Continuing Operations,X
Net Cash Flows From Used In Financing Activities,X
Net Cash From Financing Activities Continuing Operations,X
Net Cash Provided Used By Financing Activities,X
Net Cash Used In Financing Activities-Continuing Operations,X
"Net Cash Provided By Used In Financing Activities, Total",X
Net Cash Used In Provided By Financing Activities,X
Cash Provided Used By Financing Activities,X
Net Cash Used In/ Provided By Financing Activities,X
* Net Cash Provided By/-Used In Financing Activities *,X


In [43]:
df[filter] = 'NCF'

In [44]:
#NCI    Net Cash Used fot Investing 

filter = df.index.str.contains(r"cash",case=False, regex=True, na=False) & \
            df.index.str.contains(r"investing",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"disclosure|vehicles|classified|attrib",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"reclass|lease|inoperating|working",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"estate|restricted|reorg|note|sale|proceed",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"divested|equipment|property|non",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"stock|discontinued|insurance",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"venture|tax|interest|fair|equity|other",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"-1|escrow|overdraft|instruments|loan",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"discontinuing|derivatives|program",case=False, regex=True, na=False) & \
          ~ df.index.str.contains(r"collateral|adjustments|reconciliation|liabilities",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Cash Provided By/-Used For Investing Activities,X
Cash From Used For Investing Activities,X
Net Cash Used In / Provided By Investing Activities,X
Net Cash Used For From Investing Activities,X
Net Cash Used Provided By Investing Activities From Continuing Operations,X
Net Cash From Investing Activities Continuing Operations,X
Cash Used For Provided By Investing Activities,X
Cash Provided By Used For Investing Activities,X
Cash From Investing Activities Continuing Operations,X
Net Investing Cash,X


In [45]:
df[filter] = 'NCI'

In [46]:
#CE Capital Expenditure

filter = df.index.str.contains(r"expenditure",case=False, regex=True, na=False) & \
           df.index.str.contains(r"capital",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"discontinued|liquidity",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"construction|liabilities|escrow|estate",case=False, regex=True, na=False)

df[filter]            

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Accrued Capital Expenditures, End Of Period",X
"Capital Expenditures From Continuing Operations, Excluding Acquisitions",X
Capital Expenditures For Other Properties And Equipment,X
Increase Decrease In Accrued And Unpaid Capital Expenditures,X
Total Capital Expenditures,X
Accrued Capital Expenditures Included In Accounts Payable,X
"Project Capital Expenditures, Net Of Reimbursements",X
Capital Expenditures And Other,X
Increase Decrease In Accrued Capital Expenditures And Accounts Payable For Property And Equipment,X
-Decrease Increase In Accrued Capital Expenditures,X


In [47]:
df[filter] = 'CE'

In [48]:
filter = df.index.str.contains(r"capital",case=False, regex=True, na=False) & \
           df.index.str.contains(r"spend",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"discontinued|liquidity",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"construction|liabilities|escrow|estate",case=False, regex=True, na=False)

df[filter]            

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Capital Spending,X


In [49]:
df[filter] = 'CE'

In [50]:
#PPE Property, plant, equipment

filter = df.index.str.contains(r"property",case=False, regex=True, na=False) & \
           df.index.str.contains(r"equipment",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"discontinued|liquidity|lease|software",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"gain|insurance|deprec|equity|held|sale",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"transfer|debt|disposal|capital|financing",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"construction|liabilities|escrow|estate",case=False, regex=True, na=False)

df[filter]  

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Accrued Purchases Of Property, Plant And Equipment",X
Additions To Property And Equipment,X
Investment In Property And Equipment,X
"Property, Plant And Equipment, Net",X
Proceeds From Government Assistance Allocated To Property And Equipment,X
"Purchases Of Other Property, Plant, And Equipment",X
"Purchases Of Property And Equipment And Other Investing Activities, Net",X
"Additions To Other Property, Plant And Equipment",X
Net Increase In Property And Equipment Included In Accounts Payable And Accrued Expenses,X
"Non-Cash Acquisitions Of Property, Plant And Equipment",X


In [51]:
df[filter] = 'PPE'

In [52]:
filter = df.index.str.contains(r"capital",case=False, regex=True, na=False) & \
           df.index.str.contains(r"spend",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"discontinued|liquidity",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"construction|liabilities|escrow|estate",case=False, regex=True, na=False)

df[filter]     

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Capital Spending,CE


In [53]:
df_line_items = df[df['type'] != 'X']
df_line_items

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Repurchase Of Common Shares And Shares Withheld In Lieu Of Income Taxes,SBB
Market Value Of Common Shares Received From Employees In Connection With Share-Based Compensation See Note 17,SC
Net Cash And Cash Equivalents Provided By Used In Operating Activities,NCO
"Accrued Purchases Of Property, Plant And Equipment",PPE
"Repurchases Of Common Stock, Including Accelerated Share Repurchase",SBB
...,...
"Purchases Of Property, Plant And Equipment, Accrued",PPE
Stock Repurchased For Payroll Taxes And Other,SBB
Conversion Of Warrant For Convertible Preferred Stock To A Warrant For Class B Common Stock And Net Exercise Of Warrant To Purchase Class B Common Stock,SBB
Net Cash From Used For Financing Activities,NCF


In [54]:
# Save mappings


mappings = {}

mappings['headings'] = df_headings
mappings['line_items'] = df_line_items

# Save to file


with open(PROJ_ROOT_PATH + '/pickle/mappings_cash_stage3.pkl', 'wb') as f:
  pickle.dump(mappings, f)


In [55]:
filepath = os.path.join(PROJ_ROOT_PATH,'pickle','table_labels_cash_stage3.pkl')

with open(filepath, 'wb') as f:
  pickle.dump(table_labels, f)
