In [1]:
import numpy as np
import pandas as pd
import time
import os
import pickle
import re

In [2]:
pd.set_option('display.max_rows', 700)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.2f}'.format

In [3]:
DATA_ROOT_PATH="/mnt/data/projects/MD1/data/R1000/reports/"
PROJ_ROOT_PATH="/home/priyesh/projects/MD1"

In [4]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','clean_master_data_interim.pkl')
clean_master_data = pd.read_pickle(filepath)

In [5]:
def get_line_items(t):
 
  headings = []
  line_items = []
    
  for index, row in t.iterrows():
    
    if row.replace('', np.nan).isna().all():
      # Blank row, so assume heading
    
      headings.append(index)
    else:    
      line_items.append(index)

  return headings, line_items

In [6]:
def get_table_labels(stype):

  master_headings = []
  master_line_items = []

  ticker_list = list(clean_master_data.keys())

  for ticker in ticker_list:
    
    print(ticker)
    
    for yr in clean_master_data[ticker]:
      
      # Allow for the likelihood that statement might be missing for given ticker and yr
        
      if stype in clean_master_data[ticker][yr]:
        try:
          headings, line_items = get_line_items(clean_master_data[ticker][yr][stype]['table'])
    
          master_headings = master_headings + headings
          master_line_items = master_line_items + line_items
        except:
          print("Error : {} {}".format(ticker, yr))
          exit() 

  # Remove duplicates from headings and line items

  unique_list = set(master_headings)
  headings = list(unique_list)

  unique_list = set(master_line_items)
  line_items = list(unique_list)

  return {'headings' : headings,
          'line_items' : line_items}


In [7]:
table_labels = get_table_labels('cash')

AA
AAL
AAP
AAPL
ABBV
ABNB
ABT
ACGL
ACHC
ACI
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
ADT
AEE
AEP
AES
AFG
AFL
AFRM
AGCO
AGL
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGM
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AM
AMAT
AMC
AMCR
AMD
AME
AMED
AMG
AMGN
AMP
AMZN
AN
ANET
ANSS
AON
AOS
APA
APD
APH
APO
APP
APTV
AR
ARES
ARMK
ARW
ASH
ATO
ATR
ATUS
ATVI
AVGO
AVT
AVTR
AWI
AWK
AXON
AXS
AXTA
AYI
AYX
AZEK
AZO
AZTA
BA
BAC
BAH
BALL
BAX
BBWI
BBY
BC
BDX
BEN
BERY
BFAM
BG
BHF
BIIB
BILL
BIO
BJ
BK
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BSX
BSY
BURL
BWA
BWXT
BX
BYD
C
CABO
CACC
CAH
CAR
CARR
CAT
CB
CBOE
CBSH
CC
CCCS
CCK
CCL
CDAY
CDNS
CDW
CE
CEG
CERT
CF
CFG
CFLT
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHPT
CHRW
CHTR
CIEN
CINF
CL
CLH
CLVT
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNM
CNP
CNXC
COF
COIN
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CR
CRI
CRL
CRUS
CRWD
CSCO
CSL
CTLT
CTRA
CTSH
CTVA
CVNA
CVX
CW
CZR
D
DAL
DAR
DASH
DBX
DCI
DD
DDOG
DECK
DFS
DG
DGX
DHI
DHR
DINO
DIS
DISH
DKNG
DKS
DLB
DLTR
DNA
DOCS
DOCU
DOV
DOW
DPZ
DRVN
DT
DTE
DTM
DUK
DV
DVA
DVN
DXC
DX

In [8]:
all_table_labels = table_labels['headings'] + table_labels['line_items']
df = pd.DataFrame(all_table_labels,columns=['line_item'])

### Net Income

In [16]:
mask = df['line_item'].str.contains(r"Net Income",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Adjustment",case=True, regex=True, na=False) & \
        ~df['line_item'].str.contains(r"Other",case=True, regex=True, na=False)
          
df[mask].reset_index()

Unnamed: 0,index,line_item
0,30,Adjustments To Reconcile Net Income To Net Cash And Cash Equivalents Provided By Used For Operat...
1,47,Adjustments To Reconcile Net Income Loss To Cash Provided From Used For Operations:
2,51,Adjustments To Reconcile Net Income To Total Net Cash From Operating Activities:
3,56,Adjustments To Reconcile Net Income Loss To Net Cash Flows Provided By Used For Operating Activi...
4,62,Adjustments To Reconcile Net Income Loss To Operating Cash Flows:
5,127,Adjustments To Reconcile Net Income To Net Cash Provided By Operating Activities Net Of Acquisit...
6,153,Adjustments To Reconcile Net Income After Extraordinary Items To Net Cash Provided By Operating ...
7,199,Adjustments To Reconcile Net Income To Net Cash Flows From Operating Activities:
8,209,Adjustments To Reconcile Net Income To Total Operating Activities:
9,235,Adjustments To Reconcile Consolidated Net Income To Net Cash Provided By Operating Activities:


In [19]:
mask = df['line_item'].str.contains(r"Net Income",case=True, regex=True, na=False) & \
        ~df['line_item'].str.contains(r"Adjustment|Non|Equity|Oper|Ins",case=True, regex=True, na=False) & \
        ~df['line_item'].str.contains(r"Attrib|Incl|Deduct|Tax|Tran|Comp",case=True, regex=True, na=False) & \
        ~df['line_item'].str.contains(r"Invest|Comm|Stock|Share|Nume",case=True, regex=True, na=False) & \
        ~df['line_item'].str.contains(r"Extr|Acq|Dives|Cons|Cont|Citi",case=True, regex=True, na=False)
          
df[mask].reset_index()

Unnamed: 0,index,line_item
0,283,Net Income
1,2633,Net Income Loss
2,11573,Net Income A
3,15288,Net Income / Loss
4,17038,Net Income Loss:
5,18387,Net Income
6,18660,Net Income:
7,25955,Net Income/-Loss
8,32422,Net Income Loss


In [21]:
mask = df['line_item'].str.contains(r"Net Income",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Adjustment",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Non",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Other",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,15889,"Other Non-Cash Adjustments To Net Income, Net"
1,24622,Other Non-Cash Adjustments To Net Income Loss
2,25213,Other Non-Cash Adjustments To Net Income


In [22]:
mask = df['line_item'].str.contains(r"Net Income",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Non",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Incl",case=True, regex=True, na=False) & \
        df['line_item'].str.contains(r"Other",case=True, regex=True, na=False)
          
df[mask].reset_index()

Unnamed: 0,index,line_item
0,12107,Other Non-Cash Amounts Included In Net Income
1,24511,"Other Non-Cash Amounts Included In Net Income Loss, Net"
2,27840,Other Non-Cash Amounts Included In Net Income Loss
3,34116,Other Non-Cash Items Included In Net Income


In [24]:
mask = df['line_item'].str.contains(r"Operations",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Continuing",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Earn|Income",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Adjust",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Tax|Discon|Part|Share",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Dil|Pro|Begin|Less|Other|Basic",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,267,Adjustments To Reconcile Earnings Loss From Continuing Operations To Net Cash From Operating Act...
1,361,Adjustments To Reconcile Earnings From Continuing Operations To Net Cash From Operations:
2,727,Adjustments To Reconcile Income Loss From Continuing Operations To Net Cash From Operating Activ...
3,771,Adjustments To Income From Continuing Operations
4,814,Adjustments To Reconcile Earnings From Continuing Operations To Net Cash From Operating Activities:
5,1341,Adjustments To Income From Continuing Operations:
6,1757,Adjustments To Reconcile Net Income Loss To Operating Cash Flows From Continuing Operations:
7,1843,Adjustments To Reconcile Income From Continuing Operations To Cash Flows From Operating Activities
8,1904,Adjustments To Income Loss From Continuing Operations
9,2077,Adjustments To Reconcile Net Income To Net Cash From Continuing Operations:


In [26]:
mask = df['line_item'].str.contains(r"Operations",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Continuing",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Earn|Income",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Net",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Adjust|Non|Attrib|Common",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Tax|Discon|Part|Share",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Dil|Pro|Begin|Less|Other|Basic",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,5856,Net Earnings Loss From Continuing Operations
1,24764,Net Income/-Loss From Continuing Operations
2,30130,"Income From Continuing Operations, Net"
3,30656,Net Loss Income From Continuing Operations
4,34965,Net Earnings From Continuing Operations
5,35360,Net Loss Earnings From Continuing Operations
6,36329,Net Income Loss From Continuing Operations
7,39588,Net Income From Continuing Operations


In [27]:
mask = df['line_item'].str.contains(r"Operations",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Continuing",case=True, regex=True, na=False) & \
           df['line_item'].str.contains(r"Earn|Income",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Net",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Adjust|Non|Attrib|Common",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Tax|Discon|Part|Share|Retain",case=True, regex=True, na=False) & \
          ~df['line_item'].str.contains(r"Dil|Pro|Begin|Less|Other|Basic",case=True, regex=True, na=False)
df[mask].reset_index()

Unnamed: 0,index,line_item
0,4949,Income From Continuing Operations:
1,6012,-Loss Income From Continuing Operations
2,7834,Income From Continuing Operations
3,20334,Earnings/-Loss From Continuing Operations
4,20871,Income/-Loss From Continuing Operations
5,22877,Income Loss From Continuing Operations
6,23474,Earnings From Continuing Operations
7,24302,Earnings Loss From Continuing Operations
8,39453,-Loss Earnings From Continuing Operations


### Cash Flow from operating Activities

In [28]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Operating",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Total|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|Fin|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,282,Cash Flows From Used For Operating Activities:
1,309,Cash Flows – Operating Activities:
2,366,Cash Flows From Operating Activities Of Continuing Operations:
3,546,Cash Flows Used In Provided By Operating Activities:
4,555,Operating Cash Flows:
5,701,Cash Flows From Operating Activities: [Abstract]
6,784,Cash Flows Provided By Disbursed For Operating Activities:
7,787,Cash Flows – Operating Activities
8,851,Cash Flows From Operating Activities Of Continuing Operations
9,860,Cash Flows From Operating Activities From Continuing Operations:


### Total Cash Flows From Operating

In [29]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Operating",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|Fin|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,9979,Total Cash Flows Provided Used By Operating Activities From Continuing Operations
1,15570,Total Cash Flows Provided By Operating Activities
2,15824,Total Cash Flows From Operating Activities
3,21076,Total Cash Flows Provided By Used In Operating Activities
4,22753,Total Cash Flows Provided By Operating Activities From Continuing Operations
5,27773,Total Cash Flows Provided By Operating Activities Of Continuing Operations


### Cash Flow From Financing

In [15]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Fina",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Total|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Attrib|Cont|Dis|Lease",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,36,Cash Flows From Financing Activities
1,103,Cash Flow Used In Provided By Financing Activities
2,109,Cash Flows Provided From Used By Financing Activities
3,116,Cash Flows For From Financing Activities:
4,255,Cash Flow Used In Financing Activities:
5,257,Cash Flows Used In Financing Activities:
6,403,Cash Flows Used In Provided By Financing Activities:
7,584,Cash Flows From Financing Activities-
8,619,Cash Flows From Used For Financing Activities:
9,644,Cash Flows Financing Activities


In [16]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Fina",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Attrib|Cont|Dis|Lease",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,9719,Total Cash Flows Used In Provided By Financing Activities
1,17038,Total Cash Flows From Financing Activities
2,30145,Total Cash Flows Used In Financing Activities
3,38305,Total Cash Flows Provided By Used In Financing Activities
4,38909,Total Cash Flows Provided Used By Financing Activities


In [22]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Fina",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Dis|Lease",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,28522,Total Cash Flows Used In Provided By Financing Activities Of Continuing Operations
1,33283,Total Cash Flows Used In Financing Activities Of Continuing Operations
2,33641,Total Cash Flows Provided By Used In Financing Activities Of Continuing Operations
3,34669,Total Cash Flows Used By Financing Activities From Continuing Operations
4,37713,Total Cash Flows Provided Used By Financing Activities From Continuing Operations


In [24]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Fina",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Dis|Lease|Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,295,Cash Flows From Financing Activities Continuing Operations:
1,1205,Cash Flows From Financing Activities Attributable To Continuing Operations:
2,1252,Cash Flows From Financing Activities Of Continuing Operations
3,1521,Cash Flows From Financing Activities Continuing Operations:
4,1771,Cash Flows From Financing Activities From Continuing Operations:
5,2209,Cash Flows From Financing Activities Of Continuing Operations:
6,2381,Cash Flows Provided Used By Financing Activities From Continuing Operations
7,23265,Financing Cash Flows From Used In Continuing Operations
8,24577,Financing Cash Flow From Continuing Operations
9,27499,Financing Cash Flows Used In Continuing Operations


### Cash and Cash Equivalents

In [26]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"End Of",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Recon|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,777,Cash And Cash Equivalents At End Of Period
1,965,"Cash And Cash Equivalents, End Of Year"
2,1515,"Cash, Cash Equivalents, And Restricted Cash At End Of Period:"
3,1705,"Cash, Cash Equivalents And Restricted Cash At End Of Period"
4,1947,"Cash, Cash Equivalents And Restricted Cash At End Of Period:"
5,3160,"Cash, Cash Equivalents And Restricted Cash At End Of Year"
6,3836,"Cash, Cash Equivalents And Restricted Cash At End Of Year Includes 2 Million Of Restricted Cash ..."
7,3985,"Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents At End Of Period"
8,4252,"Cash, Cash Equivalents And Restricted Cash At End Of Fiscal Year"
9,4254,"Cash And Cash Equivalents And Restricted Cash, End Of Year"


In [27]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"End Of",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Recon|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,3785,"Total Cash, Cash Equivalents And Restricted Cash, End Of Period"
1,5082,"Total Cash, Cash Equivalents And Restricted Cash—End Of Year"
2,6958,"Total Cash And Cash Equivalents And Restricted Cash And Cash Equivalent, End Of Period"
3,8360,"Total Cash And Cash Equivalents And Restricted Cash And Cash Equivalents, End Of Period"
4,10120,"Total Cash And Cash Equivalents, End Of Period"
5,14142,"Total Cash, Cash Equivalents And Restricted Cash End Of Year"
6,27204,Total Cash And Cash Equivalents At End Of Period
7,31627,"Total Cash And Cash Equivalents, And Restricted Cash, End Of Period"
8,32664,"Total Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents, End Of Year"
9,40776,Total Cash And Cash Equivalents At End Of Period Including 326 Million Classified As Held For Sa...


In [48]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"End Of|End|Period",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Balance",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Ending|Jan|Dec|Of",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,73,"Cash, Cash Equivalents, And Restricted Cash"
1,206,Cash And Cash Equivalents Include:
2,207,"Cash, Cash Equivalents, Restricted Cash And Restricted Cash Equivalents [Abstract]"
3,301,Cash And Cash Equivalents [Abstract]
4,395,"Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents [Abstract]"
5,812,Cash And Cash Equivalents Including Restricted Cash
6,1045,"Cash, Cash Equivalents And Restricted Cash"
7,1330,"Cash And Cash Equivalents, Including Restricted Cash"
8,1349,"Cash And Cash Equivalents, And Restricted Cash And Cash Equivalents"
9,1760,Cash And Cash Equivalents And Restricted Cash


In [50]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"End Of|End|Period",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Balance",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Ending|Jan|Dec|Of",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,12078,"Total Cash, Cash Equivalents And Restricted Cash"
1,15480,"Total Cash, Cash Equivalents, And Restricted Cash"
2,15573,"Cash, Cash Equivalents, Restricted Cash And Restricted Cash Equivalents, Total"
3,24095,Total Cash And Cash Equivalents And Restricted Cash And Restricted Cash Equivalents
4,24458,"Total Cash, Cash Equivalents, Restricted Cash And Restricted Cash Equivalents"
5,27487,"Cash And Cash Equivalents And Restricted Cash, Total"
6,28590,"Total Cash, Cash Equivalents Restricted Cash And Restricted Cash Equivalents"
7,30309,"Total Cash And Cash Equivalents, And Restricted Cash"
8,37162,"Total Cash, Cash Equivalents And Restricted Cash And Cash Equivalents"
9,38189,"Total Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents"


In [53]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"End Of|End|Period",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Balance",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Jan|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,777,Cash And Cash Equivalents At End Of Period
1,965,"Cash And Cash Equivalents, End Of Year"
2,1515,"Cash, Cash Equivalents, And Restricted Cash At End Of Period:"
3,1705,"Cash, Cash Equivalents And Restricted Cash At End Of Period"
4,1947,"Cash, Cash Equivalents And Restricted Cash At End Of Period:"
5,3160,"Cash, Cash Equivalents And Restricted Cash At End Of Year"
6,3985,"Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents At End Of Period"
7,4252,"Cash, Cash Equivalents And Restricted Cash At End Of Fiscal Year"
8,4254,"Cash And Cash Equivalents And Restricted Cash, End Of Year"
9,4301,"Cash, Cash Equivalents And Restricted Cash End Of Year"


In [54]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"End Of|End|Period",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Balance",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Jan|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Beg|Curr",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,706,Cash And Cash Equivalents Ending Balance Predecessor
1,10551,Cash And Cash Equivalents Balance At End Of Year
2,10804,"Cash, Cash Equivalents, Restricted Cash And Restricted Cash Equivalents, Ending Balance"
3,11047,"Cash, Cash Equivalents And Restricted Cash, Ending Balances"
4,11925,"Cash And Cash Equivalents Balance, End Of Year"
5,12421,"Cash, Cash Equivalents And Restricted Cash Ending Balance"
6,15067,"Cash, Cash Equivalents And Restricted Cash Ending Balance Successor"
7,17221,"Cash And Cash Equivalents, At Carrying Value, Ending Balance"
8,19051,Cash And Cash Equivalents Ending Balance Predecessor
9,20781,"Cash, Cash Equivalents And Restricted Cash, Ending Balance"


In [55]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"End Of|End",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Beg",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Jan|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Curre",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,63,"Cash And Cash Equivalents, Beginning Of Year:"
1,129,Cash And Cash Equivalents Beginning Balance Predecessor
2,697,Cash And Cash Equivalents At Beginning Of Period
3,3453,"Cash, Cash Equivalents And Restricted Cash At Beginning Of Period"
4,3809,Cash And Cash Equivalents Beginning Of Period
5,3879,"Cash, Cash Equivalents And Restricted Cash Equivalents At Beginning Of Period"
6,4358,Beginning Cash And Cash Equivalents
7,4390,"Cash, Cash Equivalents, And Restricted Cash And Cash Equivalents, Beginning Of Year"
8,4655,"Cash, Cash Equivalents, Restricted Cash, And Restricted Cash Equivalents At Beginning Of Period"
9,4657,"Cash, Cash Equivalents And Restricted Cash—Beginning Of Period"


In [56]:
mask = df['line_item'].str.contains(r"Cash Equivalents",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"End Of|End",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Finan|Invest|Operat",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Beg",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Jan|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Settlement|Sales|Cont|Recon",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Held|Consisted|Retain|vie|var",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Increase|Decrease|Reclass|Change",case=False, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Exchange|Foreign|Curre",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cust|Investment|Proceed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"^Restricted|Decon|Use Of Rest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,9994,"Total Cash And Cash Equivalents, And Restricted Cash, Beginning Of Period"
1,17706,"Total Cash And Cash Equivalents And Restricted Cash And Cash Equivalents, Beginning Of Period"
2,27921,Total Cash And Cash Equivalents At Beginning Of Period


### Cash Flow From Investing

In [61]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Invest",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Lessee|Dis|Req|Other",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Total|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,205,Cash Flows For Investing Activities:
1,307,Cash Flows From Investing Activities-
2,435,Cash Flows Used In Provided By Investing Activities:
3,484,Cash Flows Investing Activities
4,492,Cash Flows Provided By Used For Investing Activities:
5,683,Cash Flows Provided By Used In Investing Activities
6,717,Cash Flows Used In Investing Activities:
7,769,Cash Flow From Investing Activities
8,811,Cash Flows From Investing Activities:
9,831,Cash Flows From/-Used In Investing Activities


In [62]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Invest",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Lessee|Dis|Req|Other",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Total|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,790,Cash Flows Provided Used By Investing Activities From Continuing Operations
1,881,Cash Flows From Investing Activities Attributable To Continuing Operations:
2,1416,Cash Flows From Investing Activities Of Continuing Operations:
3,2294,Cash Flows From Investing Activities From Continuing Operations:
4,2426,Cash Flows From Investing Activities Of Continuing Operations
5,3058,Cash Flows From Investing Activities Continuing Operations:
6,12685,Cash Flows From Investing Activities Continuing Operations
7,14375,Cash Flows From Investing Activities – Continuing Operations
8,28514,Investing Cash Flow From Continuing Operations
9,40114,Investing Cash Flows Used In Continuing Operations


In [63]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Invest",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Lessee|Dis|Req|Other",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,31260,Total Cash Flows Used In Investing Activities Of Continuing Operations
1,32017,Total Cash Flows Provided By Investing Activities Of Continuing Operations
2,40853,Total Cash Flows Used By Investing Activities From Continuing Operations
3,42281,Total Cash Flows Provided By Used In Investing Activities Of Continuing Operations


In [64]:
mask = df['line_item'].str.contains(r"Cash.Flow",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Invest",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Cont",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Lessee|Dis|Req|Other",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Working|Convert|Changes",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Addition|Increase|Decrease|Rel",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Disc|Adj|Reco|Net|Paid|Non|lea",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,13810,Total Cash Flows Provided By Used In Investing Activities
1,14693,Total Cash Flows Used In Provided By Investing Activities
2,27792,Total Cash Flows Used In Investing Activities
3,42275,Total Cash Flows From Investing Activities


### Stock Based Compensation

In [70]:
mask = df['line_item'].str.contains(r"Compensation",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Stock|Share",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Director|Recog|Long|Settle|Adj",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Ret|Withhold|Opt|Manage|Trustee",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Proc|Grant|Excess|Cap|Cash",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Non.Employee|Aff|Mod",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Rel|Tax|Common|Inter|Amort|Issu",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,2100,Share-Based Compensation
1,2530,Stock-Based Compensation
2,3110,Stock Based Compensation
3,3973,"Share-Based Compensation Activities, Net"
4,4179,Employee Stock Compensation Expense
5,4305,Stock-Based Employee Compensation
6,4854,"Stock And Unit-Based Compensation, Net Of Forfeitures"
7,6051,Shared Based Compensation Expense
8,6453,Stock Compensation
9,6614,Share Based Payments In Satisfaction Of Accrued Compensation


### Dividends

In [73]:
mask = df['line_item'].str.contains(r"Dividends",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Common|Ord",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Ret|Withhold|Opt|Manage|Trustee",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Proc|Grant|Excess|Cap|Cash",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Liability|Increase|Specia",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Kind|Wend|Dowd|Not Paid|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Changes|Ren|Trad",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Equi|Class|Vor|Ugi|Ppg",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Fina|Member|Accr|Renn|Contra",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Net Of Dividend|Other|Former",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Reinvest|Part|Unvest|Not Yet",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Non.Employee|Aff|Mod|Pref|Unpaid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Quarter|Invest|Subsid|Rec|From|Non",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl|Parent",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,5382,Dividends Paid On Toro Common Stock
1,8768,Dividends Paid To Ordinary Shareholders
2,10123,"Payments Of Dividends, Common Stock"
3,11308,Common Dividends Issued In Stock
4,12166,Dividends To Common Shareholders
5,12997,Dividends Paid On Common Stock
6,13914,Payment Of Common Stock Dividends
7,14729,Common Dividends Paid
8,14904,"Dividends, Common Stock, Stock"
9,15011,Common Stock Dividends Paid


In [75]:
mask = df['line_item'].str.contains(r"Dividends",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Paid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Common|Ord",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Ret|Withhold|Opt|Manage|Trustee",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Proc|Grant|Excess|Cap|Cash",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Liability|Increase|Specia",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Kind|Wend|Dowd|Not Paid|Dec",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Changes|Ren|Trad",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Equi|Class|Vor|Ugi|Ppg",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Fina|Member|Accr|Renn|Contra",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Net Of Dividend|Other|Former",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Reinvest|Part|Unvest|Not Yet",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Non.Employee|Aff|Mod|Pref|Unpaid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Quarter|Invest|Subsid|Rec|From|Non",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl|Parent",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,3004,Dividends Paid:
1,10621,Dividends Paid And Distributions
2,13437,Dividends Paid To Shareholders
3,14047,Dividends And Interest Paid
4,18328,Dividends Paid
5,21492,Dividends And Dividend Rights Paid
6,25391,Dividends Paid To Tfac
7,28928,Dividends Paid To Borgwarner Stockholders
8,29853,Dividends Paid To Lear Corporation Stockholders
9,33233,Dividends Paid To Lkq Stockholders


In [76]:
mask = df['line_item'].str.contains(r"Dividends",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Not Yet|Not Paid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl|Parent",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,7441,"Cash Dividends Declared And Accrued, But Not Paid"
1,12630,Dividends Declared Not Yet Paid
2,12751,Common Stock Dividends Declared But Not Yet Paid
3,14361,"Dividends Declared, But Not Yet Paid"
4,15653,"Dividends Declared, Not Yet Paid"
5,15713,Dividends Declared But Not Yet Paid
6,17042,"Cash Dividends Declared On Common Stock, Not Yet Paid"
7,18396,"Dividends Declared, Not Paid"
8,21676,Dividends Accrued But Not Yet Paid
9,23651,"Cash Dividends Declared, Not Yet Paid"


### Capital Expenditure

In [80]:
mask = df['line_item'].str.contains(r"Expenditure",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Capital",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Not Paid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Funding",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Repay|Project|Adj|Increa|Decre",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Prop|Constr|Estate|Rel|Financed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Account|Disallow|Other|Addi|Fina",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Unpaid|Accru|Rec|Discon|Not Yet",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl|Parent",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,963,"Capital Expenditures, Including Ltsa Prepayments"
1,1897,Capital Expenditures
2,3494,Capital Expenditures:
3,5436,"Capital Expenditures, Net Of Disposals"
4,6086,Expenditures For Capitalized Internal-Use Software
5,8512,Capital Expenditures To Be Reimbursed Under A Government Contract
6,9356,Capital Expenditures Including Capitalized Software
7,9915,"Capital Expenditures, Including Software"
8,10960,Capital And Software Expenditures
9,11142,Capitalized Software Expenditures


In [82]:
mask = df['line_item'].str.contains(r"Expenditure",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Capital",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Total",case=True, regex=True, na=False) & \
          df['line_item'].str.contains(r"Not Paid",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Funding",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Repay|Project|Adj|Increa|Decre",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Prop|Constr|Estate|Rel|Financed",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Account|Disallow|Other|Addi|Fina",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Unpaid|Accru|Rec|Discon|Not Yet",case=True, regex=True, na=False) & \
         ~df['line_item'].str.contains(r"Acqui|Tran|Client|Less|Excl|Parent",case=True, regex=True, na=False)

df[mask].reset_index()

Unnamed: 0,index,line_item
0,22470,Capital Expenditures Incurred But Not Paid
1,24902,Capital Expenditures Not Paid For
2,31369,Capital Expenditures Not Paid
