In [1]:
import numpy as np
import pandas as pd
import os
import pickle

In [2]:
pd.set_option('display.max_rows', 700)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.2f}'.format

In [3]:
DATA_ROOT_PATH="/mnt/data/projects/MD1/data/R1000/reports/"
PROJ_ROOT_PATH="/home/priyesh/projects/MD1"

In [4]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','clean_master_data_stage2.pkl')
clean_master_data = pd.read_pickle(filepath)

In [5]:
clean_master_data['WMT']['15']['income']['table']

Unnamed: 0_level_0,"Jan. 31, 2015","Jan. 31, 2014","Jan. 31, 2013"
line_item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Revenues:,,,
Net Sales,482229.0,473076.0,465604.0
Membership And Other Income,3422.0,3218.0,3047.0
Total Revenue,485651.0,476294.0,468651.0
Costs And Expenses:,,,
Cost Of Sales,365086.0,358069.0,352297.0
SGA,93418.0,91353.0,88629.0
Operating Income,27147.0,26872.0,27725.0
Interest:,,,
Debt,2161.0,2072.0,1977.0


In [6]:
def get_line_items(t):
 
  headings = []
  line_items = []
    
  for index, row in t.iterrows():
    
    if row.replace('', np.nan).isna().all():
      # Blank row, so assume heading
    
      headings.append(index)
    else:    
      line_items.append(index)

  return headings, line_items

In [7]:
def get_table_labels(stype):

  master_headings = []
  master_line_items = []

  ticker_list = list(clean_master_data.keys())

  for ticker in ticker_list:
    
    print(ticker)
    
    for yr in clean_master_data[ticker]:
      
      # Allow for the likelihood that statement might be missing for given ticker and yr
        
      if stype in clean_master_data[ticker][yr]:
        try:
          headings, line_items = get_line_items(clean_master_data[ticker][yr][stype]['table'])
    
          master_headings = master_headings + headings
          master_line_items = master_line_items + line_items
        except:
          print("Error : {} {}".format(ticker, yr))
          exit() 

  # Remove duplicates from headings and line items

  unique_list = set(master_headings)
  headings = list(unique_list)

  unique_list = set(master_line_items)
  line_items = list(unique_list)

  return {'headings' : headings,
          'line_items' : line_items}


In [8]:
table_labels = get_table_labels('income')

AAL
AAP
AAPL
ABBV
ABT
ACGL
ACHC
ACM
ACN
ADBE
ADI
ADM
ADP
ADSK
AEE
AEP
AES
AFG
AFL
AGCO
AGNC
AGO
AGR
AIG
AIZ
AJG
AKAM
AL
ALB
ALGN
ALK
ALL
ALLE
ALLY
ALNY
ALSN
AMAT
AMC
AMD
AME
AMED
AMG
AMGN
AMH
AMP
AMT
AMZN
AN
ANET
ANSS
AON
AOS
APD
APH
APTV
AR
ARE
ARES
ARMK
ARW
ATO
ATR
ATVI
AVB
AVT
AWI
AWK
AXS
AXTA
AYI
AZO
BA
BAC
BAH
BALL
BAX
BBY
BC
BDX
BEN
BERY
BFAM
BG
BIIB
BIO
BK
BKI
BKNG
BLD
BLDR
BLK
BMRN
BMY
BOKF
BR
BRKR
BRO
BRX
BSX
BURL
BWA
BWXT
BX
BXP
BYD
C
CABO
CACC
CACI
CAG
CAH
CAR
CASY
CAT
CB
CBOE
CBRE
CBSH
CC
CCI
CCK
CCL
CDNS
CDW
CE
CF
CFG
CFR
CG
CGNX
CHD
CHDN
CHE
CHH
CHK
CHRW
CHTR
CIEN
CINF
CL
CLH
CLX
CMA
CMCSA
CME
CMG
CMI
CMS
CNA
CNC
CNP
COF
COLB
COLM
COO
COST
COTY
CPB
CPRI
CPRT
CPT
CR
CRI
CRL
CRUS
CSCO
CSGP
CSL
CSX
CTAS
CTLT
CTSH
CUBE
CUZ
CVS
CVX
CW
D
DAL
DAR
DCI
DECK
DEI
DFS
DG
DGX
DHI
DHR
DISH
DKS
DLB
DLR
DLTR
DOV
DPZ
DRI
DTE
DUK
DVA
DVN
DXCM
EA
EBAY
ECL
ED
EEFT
EFX
EGP
EHC
EIX
EL
ELS
EMN
EMR
ENPH
ENTG
EOG
EPAM
EPR
EQIX
EQR
EQT
ERIE
ES
ESI
ESS
ETN
ETR
EVA
EVR
EW
EWBC
EXAS
EXC
EXEL
EXP
EXPD

In [9]:
with open(PROJ_ROOT_PATH + '/pickle/temp_table_labels_stage3.pkl', 'wb') as f:
  pickle.dump(table_labels, f)

# Map Headings

In [10]:
# Read from Pickle file

filepath = os.path.join(PROJ_ROOT_PATH,'pickle','temp_table_labels_stage3.pkl')
table_labels = pd.read_pickle(filepath)

In [11]:
df = pd.DataFrame(table_labels['headings'],columns=['heading'])
df['type'] = 'X'
df = df.set_index('heading')
df

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Fixed Income And Data Services,X
Other Fees [Member],X
Operating Segments | Distribution | Total Gas Sales Revenues,X
Gross Revenues From Home Sales,X
Supplemental Revenue Member [Member],X
...,...
Diluted Earnings Loss Per Common Share:,X
Net Operating Revenues,X
Card Fees,X
Accumulated Translation Adjustment [Member],X


In [12]:
filter = df.index.str.contains(r"Revenue[s]?",case=False, regex=True, na=False) 
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Operating Segments | Distribution | Total Gas Sales Revenues,X
Gross Revenues From Home Sales,X
Supplemental Revenue Member [Member],X
Account Servicing Revenue,X
Fee Revenue:,X
Other Sales And Revenues,X
Other Rental And Property Revenues,X
"Subscription, Licensing, And Other Revenues",X
Home Sale Revenues | Homebuilding,X
Other Collaboration | Collaboration Revenue,X


In [13]:
df[filter] = 'R'

In [14]:
filter = df.index.str.contains(r"Sales",case=False, regex=True, na=False) & \
           ~df.index.str.contains(r"Enum|Type",case=False, regex=True, na=False) & \
           ~df.index.str.contains(r"prof|member|per",case=False, regex=True, na=False)
df[filter]        

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Gross Revenues From Home Sales,R
Net Service Sales,X
Other Sales And Revenues,R
Company Sales,X
"Investment Gains On Sales, Net",X
Sales And Revenues:,R
"Gross Revenues From Home Sales, Brokered Resales And Ancillary Services",R
Net Sales,X
Total Cost Of Sales | Used Vehicle Sales,X
Gas Sales,X


In [15]:
df[filter] = 'R'

In [16]:
filter = df.index.str.contains(r"Cost Of Sale",case=False, regex=True, na=False) & \
           ~df.index.str.contains(r"Other|mem|enum|bus|type|mast|Veh",case=False, regex=True, na=False)
df[filter]    

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Cost Of Sales Excluding Deprec And Amort,R
Cost Of Sales:,R
Cost Of Sales And Operating Expenses:,X
Cost Of Sales,R
Cost Of Sales And Service Revenues,R


In [17]:
df[filter] = 'COS'

In [18]:
filter = df.index.str.contains(r"expenses[:]?$",case=False, regex=True, na=False) & \
           df.index.str.contains(r"^Cost",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"non|direct|property|rail|acq",case=False, regex=True, na=False)
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Costs And Operation Expenses:,X
Costs And Operating Expenses,X
Costs And Expenses,X
Cost And Operating Expenses:,X
Cost Of Sales And Operating Expenses:,COS
Costs And Expenses:,X
Cost And Expenses,X
Costs And Operating Expenses:,X
Cost And Expenses:,X


In [19]:
df[filter] = 'OE'

In [20]:
filter = df.index.str.contains(r"Operating Expenses$",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"non|direct|property|rail|acq",case=False, regex=True, na=False)
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Utility Operating Expenses,X
Costs And Operating Expenses,OE
Operating Expenses,X
Total Operating Expenses,X


In [21]:
df[filter] = 'OE'

In [22]:
filter = df.index.str.contains(r"Stock",case=True, regex=True, na=False) & \
           df.index.str.contains(r"Per Share",case=True, regex=True, na=False) & \
           df.index.str.contains(r"Dilute|Basic",case=True, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Earnings Per Share Attributable To Common Stock Diluted:,X
Earnings Per Share Of Common Stock Diluted:,X
Net Income Loss Per Share Of Class A And Class B Common Stock And Class C Capital Stock Diluted:,X
Net Income Attributable To Godaddy Inc. Per Share Of Class A Common Stock—Diluted:,X
Diluted Earnings Per Share-Common Stock:,X
Net Income Loss Per Share Of Common Stock Diluted:,X
Net Earnings Loss Per Share Attributable To Common Stockholders-Basic:,X
Basic Net Income Loss Per Share Attributable To Common Stockholders:,X
Diluted Net Income Loss Per Share Attributable To Common Stockholders:,X
Net Income Loss Per Share Of Common Stock Basic:,X


In [23]:
df[filter] = 'EXCL'

In [24]:
filter = df.index.str.contains(r"Per.*Share",case=True, regex=True, na=False)
      

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Earnings Per Common Share Attributable To Charter Shareholders:,X
Net Income Per Share Omnicom Group Inc.:,X
Income Loss Per Share,X
-Loss Gain From Discontinued United Kingdom Operations Per Share:,X
Diluted Earnings Loss Per Share Attributable To Common Shares,X
...,...
Net Profit Loss Per Share:,X
"Basic Earnings Loss Per Share Attributable To Best Buy Co., Inc. Shareholders",X
Earnings Loss Per Share Attributable To Stockholders After Adjustment Of Redeemable Limited Partners Capital To Redemption Amount:,X
Basic Earnings Per Share Data:,X


In [25]:
df[filter] = 'EXCL'

In [26]:
filter = df.index.str.contains(r"Common Stock",case=True, regex=True, na=False) & \
          df.index.str.contains(r"Earn",case=True, regex=True, na=False)
      

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
"Earnings Loss Per Share Of Common Stock, Continuing Operations:",EXCL
Net Earnings Loss Per Share Of Common Stock:,EXCL
Earnings Per Share Attributable To Common Stock Diluted:,EXCL
Earnings Per Share Of Common Stock Diluted:,EXCL
Diluted Earnings Loss Per Common Share Attributable To Common Stock,EXCL
Earnings Loss Per Common Share Attributable To Owens Corning Common Stockholders,EXCL
"Earnings Per Share Attributable To Intercontinental Exchange, Inc. Common Stockholders:",EXCL
Diluted Earnings Per Share-Common Stock:,EXCL
Net Earnings Loss Per Share Attributable To Common Stockholders-Basic:,EXCL
Earnings Per Share Class A And B Common Stock:,EXCL


In [27]:
df[filter] = 'EXCL'

In [28]:
filter = df.index.str.contains(r"Common.*Stock",case=True, regex=True, na=False) & \
          df.index.str.contains(r"Basic|Dilute",case=True, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Earnings Per Share Attributable To Common Stock Diluted:,EXCL
Earnings Per Share Of Common Stock Diluted:,EXCL
Diluted Earnings Loss Per Common Share Attributable To Common Stock,EXCL
Basic Net Income Per Common Share Attributable To Microchip Technology Stockholders,EXCL
Net Income Loss Per Share Of Class A And Class B Common Stock And Class C Capital Stock Diluted:,EXCL
Net Income Attributable To Godaddy Inc. Per Share Of Class A Common Stock—Diluted:,EXCL
Diluted Earnings Per Share-Common Stock:,EXCL
Net Income Loss Per Share Of Common Stock Diluted:,EXCL
Net Earnings Loss Per Share Attributable To Common Stockholders-Basic:,EXCL
Basic Net Income Loss Per Share Attributable To Common Stockholders:,EXCL


In [29]:
df[filter] = 'EXCL'

In [30]:
filter = df.index.str.contains(r"Diluted[]:]?$|Basic[:]?$",case=True, regex=True, na=False)
    
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Earnings Loss Per Common Share Diluted:,EXCL
Earnings Per Weighted Average Common Share Attributable To Alliant Energy Common Shareowners Basic:,EXCL
Basic And Diluted:,X
"Earnings Per Common Share, Basic",EXCL
Earnings Losses Per Share-Diluted:,EXCL
Earnings Per Common Share Attributable To Common Shareholders Basic,EXCL
Earnings Per Share Attributable To Common Stock Diluted:,EXCL
Earnings Per Share Of Common Stock Diluted:,EXCL
Earnings Per Common Share-Fully Diluted:,EXCL
-Loss Earnings Per Share—Basic:,EXCL


In [31]:
df[filter] = 'EXCL'

In [32]:
filter = df.index.str.contains(r"Member\]$",case=True, regex=True, na=False)
df[filter]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Other Fees [Member],X
Supplemental Revenue Member [Member],R
Domestic Stores [Member] | U.S. Company-Owned Stores [Member],X
Rialto [Member],X
Entergy Texas [Member] | Electricity [Member],X
Pacific Gas And Electric Company [Member],X
Domestic Stores [Member] | Domestic Company Owned Stores [Member],X
Gains On Books Of Business Sales [Member],X
Automotive Leasing [Member],X
Rh Segment [Member],X


In [33]:
df[filter] = 'EXCL'

In [34]:
df_headings = df[df['type'] != 'X']
df_headings

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Other Fees [Member],EXCL
Operating Segments | Distribution | Total Gas Sales Revenues,R
Gross Revenues From Home Sales,R
Supplemental Revenue Member [Member],EXCL
Domestic Stores [Member] | U.S. Company-Owned Stores [Member],EXCL
...,...
Revenues And Other Income:,R
Basic Earnings Per Share Data:,EXCL
Diluted Earnings Loss Per Common Share:,EXCL
Net Operating Revenues,R


# Map Line Items

In [35]:
#Income Sheet Mappings:

#R      Revenue
#TR     Total Revenue
#TCR    Total Cost of Revenue
#GP     Gross Profit
#OE     Operating Expenses
#SGA    Selling, general administrative
#OI     Operating Income

In [36]:
df = pd.DataFrame(table_labels['line_items'],columns=['line_items'])
df['type'] = 'X'
df = df.set_index('line_items')
df

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Income Loss Attributable To Boyd Gaming Corporation,X
Net Income Loss Per Share Diluted In Usd Per Share,X
Net Sales And Other Operating Income,X
-Loss Gain On Derivatives,X
Programming And Production,X
...,...
Strategic Capital Expenses,X
Non-Operating Investment Income,X
Net Income Loss Attributable To United States Steel Corporation,X
Consolidated Income Before Provision Benefit For Income Taxes And Equity In Net Income Of Affiliates,X


In [37]:
filter = df.index.str.contains(r"Revenue[s]?",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Total",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Total Net Revenue,X
Total Rental Property Revenues,X
Total Revenues And Other Income,X
Total On-Premise Software Revenues,X
Total Other Expenses Revenues,X
Total Financing Revenue And Other Interest Income,X
Total Revenues And Non-Operating Income,X
"Total Subscription, Support And License Revenue",X
Total Rental Revenues,X
Total Maintenance And Subscription Revenue,X


In [38]:
df[filter] = 'TR'

In [39]:
filter = df.index.str.contains(r"Sales",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Total",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Total Net Sales And Revenue,TR
Total Sales And Revenues,TR
Total Sales,X
Total Service Revenues And Vehicle Sales,TR
"Sales, Total",X
Total Net Sales,X
Total Sales And Service Revenues,TR


In [40]:
df[filter] = 'TR'

In [41]:
filter = df.index.str.contains(r"Revenue[s]?",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Net",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"impair|fina|div|interest|mort|Fee|Prop",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Revenues Before Provision For Doubtful Accounts,X
Total Net Revenue,TR
Net Service Revenue,X
Revenues Before Reimbursements Net Revenues,X
Sales Revenue Net,X
"Revenues, Net Of Recourse And Reinsurance Expense",X
"Brokered Resale Revenues, Net",X
Net Operating Revenues Less Provision For Doubtful Accounts,X
Net Revenues From Services-Others,X
Resort Net Revenue,X


In [42]:
df[filter] = 'R'

In [43]:
filter = df.index.str.contains(r"Sales",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Net",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"Related|Gain|Asset|Marketing|Otti",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"impair|fina|div|interest|mort|Serv|Res|Total",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Sales And Other Operating Income,X
"Membership Sales Commissions, Deferred, Net",X
Sales Revenue Net,R
Net Consumer Product Sales,X
Net Sales Product,X
"Product Sales, Net",X
Net Sales From Products,X
Net Losses On Sales Of Facilities,X
"Sales Of Vacation Ownership Products, Net",X
Net Sales Of Tangible Products,X


In [44]:
df[filter] = 'R'

In [45]:
filter = df.index.str.contains(r"Revenue[s]?",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"impair|fina|div|interest|mort|Res|Total",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Trading Revenue,X
Special Revenue,X
"Subscription, Licensing, And Other Revenues",X
Wholesale Electric Revenues,X
Service Revenues And Sales,X
Branded Postpaid Revenues,X
Net Revenues Before Provision For Doubtful Accounts,R
Service Revenues And Other,X
Net Service Revenue,R
Fuel Surcharge Revenue,X


In [46]:
filter = df.index.str.contains(r"^Sales",case=True, regex=True, na=False) & \
   ~ df.index.str.contains(r"impair|fina|div|interest|mort|Res|Total|Taxes",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|gross share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Sales And Other Expenses,X
Sales Of Fuel Cell Systems And Related Infrastructure,X
Sales Revenue Net,R
Sales And Revenues,X
Sales And Marketing Expense,X
"Sales Of Natural Gas, Oil And Ngls",X
Sales To Related Parties,X
"Sales And Marketing, Gross",X
Sales And Other Realized Capital Gains And Losses,X
Sales And Service Revenues,X


In [47]:
df[filter] = 'R'

In [48]:
filter = df.index.str.contains(r"Revenue[s]?",case=False, regex=True, na=False) & \
         df.index.str.contains(r"Total Cost|^Cost",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"share|attributable|discontinued|equity|Less|Soft",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Cost Of Product Revenues,X
Cost Of License Revenues,X
Cost Of License And Other Revenue,X
Cost Of Revenue Including Amortization and Impairments,X
"Cost Of Revenues For Related Party Activity, See Note 17",X
Cost Of Royalty Revenues,X
Cost Of Subscriber Services And Formula 1 Revenue,X
Cost Of License And Subscription Revenue,X
Cost Of Subscription Revenue,X
Cost Of Revenues Exclusive Of Depreciation Below,X


In [49]:
df[filter] = 'CR'

In [50]:
filter = df.index.str.contains(r"Sales[s]?",case=False, regex=True, na=False) & \
           df.index.str.contains(r"Total Cost|^Cost",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]


Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Costs Of Sales,X
Cost Of Sales Excluding The Lower Of Cost Or Market Inventory Valuation Adjustment,X
Cost Of Contractor Supplies Sales,X
Cost Of Sales Excludes Amortization Of Intangible Assets,X
"Cost Of Sales,Including Warehouse And Delivery Expenses",X
Cost Of Sales Including Special Charges Of 93.9 In 2012 And 8.9 In 2011,X
Cost Of Services And Sales,X
Cost Of Vehicle Sales,X
Cost Of Sales And Operating Expenses,X
Cost Of Sales Including Special Charges A,X


In [51]:
df[filter] = 'CR'

In [52]:
filter = df.index.str.contains(r"Goods",case=False, regex=True, na=False) & \
           df.index.str.contains(r"Total Cost|^Cost",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"Acc|Aff|Depr",case=False, regex=True, na=False) & \
         ~ df.index.str.contains(r"share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]


Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Costs Of Goods Sold,X
Costs Of Goods And Services Sold,X
Cost Of Consumer Products And Other Goods Sold,X
Cost Of Goods Sold,X
Cost Of Goods And Services,X
Cost Of Goods Sold And Of Collaboration And Contract Manufacturing,X
Cost Of Goods Sold 409A Expense,X
Total Costs Of Goods Sold,X
Cost Of Goods And Services Sold,X
"Cost Of Goods Sold, Excluding Items Below",X


In [53]:
df[filter] = 'CR'

In [54]:
#GP Gross Profit

filter = df.index.str.contains(r"Profit",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Gross",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Gross Profit,X
Gross Loss Profit,X
Total Gross Profit,X


In [55]:
df[filter] = 'GP'

In [56]:
filter = df.index.str.contains(r"profit",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"insu|proceed|Lease|Collab|Oper|Cons",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"gross|cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Profit,X
Net Trading Profits,X
Profit Before Taxes,X
Profit Loss,X
Net Profit Loss,X
Net Trading Profit,X
Net Profit Loss Before Provision For Income Taxes,X
Profit Sharing,X
Trading Account Profits,X


In [57]:
df[filter] = 'P'

In [58]:
filter = df.index.str.contains(r"profit",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Operat",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"insu|proceed|Lease|Collab|Cons",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"gross|cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Profit From Operations,X
Operating Profit From Continuing Operations,X
Operating Profit,X
Operating Loss Profit,X
Operating Profit / Loss,X
Operating Profit Loss,X
Profit Loss From Operations,X


In [59]:
df[filter] = 'OP'

In [60]:
#GP Gross Margin

filter = df.index.str.contains(r"Margin",case=False, regex=True, na=False) & \
     df.index.str.contains(r"Gross",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"-1|cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Gross Margin,X
Total Gross Margin,X
Gross Margin On Revenues,X


In [61]:
df[filter] = 'GM'

In [62]:
#OE     Operating Expenses

filter = df.index.str.contains(r"^operating expense[s]?",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"revenues|income|admin|pension|attributable|discontinued|loss|gains|profit",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Operating Expenses Excl Dep And Amort,X
Operating Expenses,X
"Operating Expenses, Including 20,071 In 2015, 19,308 In 2014, And 19,152 In 2013 Of Related Party Expenses",X
"Operating Expenses Including 18,101 2012, 16,266 2011 And 14,320 2010 Paid To Related Parties",X
"Operating Expenses, Including 21,890 In 2016, 20,071 In 2015, And 19,308 In 2014 Of Related Party Expenses",X
"Operating Expenses, Including 17,823 In 2018, 21,400 In 2017, 21,890 In 2016 Of Related Party Expenses",X
"Operating Expenses, Including 21,400 In 2017, 21,890 In 2016, 20,071 In 2015 Of Related Party Expenses",X
"Operating Expenses Including 16,126 2011, 14,234 2010 And 14,882 2009 Paid To Affiliates",X
"Operating Expenses Including 18,728 2013, 17,274 2012 And 15,041 2011 Of Related Party Expenses",X
"Operating Expenses, Total",X


In [63]:
df[filter] = 'OE'

In [64]:
#SGA  Selling, general administrative

filter = df.index.str.contains(r"admin",case=False, regex=True, na=False) & \
             df.index.str.contains(r"general",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
General And Administration,X
"Other Selling, General And Administrative",X
"General And Administrative Expenses, Net Of Asset Dispositions",X
"Other Selling, General And Administrative Expenses",X
General And Admin Excluding Dep and Amort,X
General And Administrative Home Sales And Rentals,X
"Selling, General And Administrative Expense Including Net Gain/-Loss On Sale Of Divested Businesses Of 94, 0 And 1,774, Respectively",X
General And Administration Expense,X
"General, Administrative, And Other Indirect Expenses",X
"General, Administrative And Other Expenses",X


In [65]:
df[filter] = 'SGA'

In [66]:
filter = df.index.str.contains(r"sales",case=False, regex=True, na=False) & \
             df.index.str.contains(r"marketing$",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Sales, Distribution And Marketing",R
Sales And Marketing,R


In [67]:
df[filter] = 'SGA'

In [68]:
filter = df.index.str.contains(r"^Selling",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"Related",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Selling, Distribution And Administration",X
Selling And Service,X
Selling And Marketing Expenses,X
Selling Expenses,X
"Selling, General, And Admistrative",X
Selling & Administrative Expenses,X
"Selling And Administrative Expenses, Excluding Goodwill Impairment Charge",X
Selling And Marketing,X
Selling And Administrative,X
"Selling, Administrative And Other Expenses",X


In [69]:
df[filter] = 'SGA'

In [70]:
filter = df.index.str.contains(r"SGA",case=True, regex=True, na=False)
         

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
SGA,X
SGA Excluding Impairment Losses,X
SGA Excluding Dep and Amort,X
SGA Including Stock-Based Comp,X


In [71]:
df[filter] = 'SGA'

In [72]:
filter = df.index.str.contains(r"general",case=False, regex=True, na=False) & \
             df.index.str.contains(r"admin",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
General And Administration,SGA
"Other Selling, General And Administrative",SGA
"General And Administrative Expenses, Net Of Asset Dispositions",SGA
"Other Selling, General And Administrative Expenses",SGA
General And Admin Excluding Dep and Amort,SGA
General And Administrative Home Sales And Rentals,SGA
"Selling, General And Administrative Expense Including Net Gain/-Loss On Sale Of Divested Businesses Of 94, 0 And 1,774, Respectively",SGA
General And Administration Expense,SGA
"General, Administrative, And Other Indirect Expenses",SGA
"General, Administrative And Other Expenses",SGA


In [73]:
df[filter] = 'SGA'

In [74]:
filter = df.index.str.contains(r"selling",case=False, regex=True, na=False) & \
             df.index.str.contains(r"admin",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"general",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Selling, Distribution And Administration",SGA
Total Selling And Administrative Expense,X
Selling & Administrative Expenses,SGA
"Engineering, Selling And Administrative Expenses",X
"Selling And Administrative Expenses, Excluding Goodwill Impairment Charge",SGA
Selling And Administrative,SGA
"Selling, Administrative And Other Expenses",SGA
Other Selling And Administrative Expenses,X
"Selling, Marketing And Administrative Expenses",SGA
"Selling, Distribution And Administrative Expenses",SGA


In [75]:
df[filter] = 'SGA'

In [76]:
filter = df.index.str.contains(r"research",case=False, regex=True, na=False) & \
             df.index.str.contains(r"develop",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Research And Development Net Of Development Compensation Of 23,249 For The Year Ended December 31, 2009",X
In-Process Research And Development Impairment,X
Acquired In-Process And Collaborations Research And Development,X
"Research, Development And Engineering Expenses",X
"Research And Development, Including Stock-Based Compensation",X
Research And Development Revenue Under Collaborative Agreements,X
Acquired In-Process Research And Development,X
"Engineering, Research And Development",X
"Research And Development, Net",X
Research And Development R&D,X


In [77]:
df[filter] = 'RD'

In [78]:
filter = df.index.str.contains(r"Technology",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Develop",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Rev",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|income|loan|from|Amort",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Technology And Development,X


In [79]:
df[filter] = 'RD'

In [80]:
#OI Operating Income

filter = df.index.str.contains(r"income|earning",case=False, regex=True, na=False) & \
             df.index.str.contains(r"operat",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"lease|rent|affiliate|invest|interest",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"joint|vent|utility|share|benefit",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|Stock",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Operating Earnings,X
Earnings From Continuing Operations,X
Income From Continuing Operations Before Equity Income,X
Operating Expense Income,X
Operating Loss/Income,X
-Loss/Income From Continuing Operations Before Income Taxes,X
Loss From Continuing Operations Before Income Taxes And Equity In Earnings Of Subsidiaries,X
Operating Income From Continuing Operations,X
Basic Earnings From Continuing Operations,X
Earnings From Continuing Operations Before Income Tax,X


In [81]:
df[filter] = 'OI'

In [82]:
filter = df.index.str.contains(r"Loss",case=False, regex=True, na=False) & \
             df.index.str.contains(r"operat",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"cost|sharing|attributable|discontinued",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"lease|rent|affiliate|invest|interest",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"joint|vent|utility|from|share|benefit",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|Asset|Sale",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Operating Loss/Income,OI
Operating Losses,X
"-Gain Loss On Divestiture Of Hillerød, Denmark Manufacturing Operations",X
"Impairment/Loss On Operating Properties, Net Of Tax",X
"Loss On Divestiture Of Hillerød, Denmark Manufacturing Operations",X
Operating-Loss/Income,OI
Operating Loss Income,OI
Operating Loss,X
Financing Operations Loss Income,OI
-Loss Gain On Disposition Of Operations,X


In [83]:
df[filter] = 'OI'

In [84]:
filter = df.index.str.contains(r"Total",case=False, regex=True, na=False) & \
             df.index.str.contains(r"cost|expense",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Total Operating Expenses Net,X
Total Operating Expenses – Net,X
Total Expenses Excluding Cost Of Reimbursable Expense,X
"Total Operating Costs And Expenses, Net",X
"Total Operating Expenses, Net",X
Total Losses And Expenses,X
Total Benefits And Expenses,X
Total Consolidated Expenses,X
Total Expenses,X
Total Operating Costs,X


In [85]:
df[filter] = 'TOE'

In [86]:
filter = df.index.str.contains(r"Income",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Operat",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Cont",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Share|Equity|Invest|Relate|Disco",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Loss From Continuing Operations Before Income Tax Expense Benefit,X
"Income Loss From Continuing Operations Available To First Industrial Realty Trust, Inc.S Common Stockholders",X
-Loss/Income From Continuing Operations Before Income Taxes,OI
Operating Income From Continuing Operations,OI
Earnings From Continuing Operations Before Income Tax,OI
Income Taxes On Continuing Operations,OI
"Income From Continuing Operations, Diluted",OI
Earnings Loss From Continuing Operations Before Income Tax Provision Benefit,X
-Loss Income From Continuing Operations Before Gain On Sale Of Real Estate,OI
Diluted Income From Continuing Operations,OI


In [87]:
df[filter] = 'ICO'

In [88]:
filter = df.index.str.contains(r"Earn",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Operat",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Cont",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Share|Equity|Invest|Relate|Disco",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Earnings From Continuing Operations,OI
Earnings From Continuing Operations Common Stockholders,X
Basic Earnings From Continuing Operations,OI
Earnings From Continuing Operations Before Income Tax,ICO
Net Loss Earnings From Continuing Operations,OI
Net Earnings From Continuing Operations,OI
Earnings Loss From Continuing Operations Before Income Tax Provision Benefit,ICO
Ge Capital Earnings Loss From Continuing Operations,OI
-Loss Earnings From Continuing Operations Before Income Taxes,ICO
Earnings Loss From Continuing Operations,OI


In [89]:
df[filter] = 'ICO'

In [90]:
filter = df.index.str.contains(r"Income",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Net",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Cons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Invest|Decons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
"Income Of Consolidated Vies, Net",X
Consolidated Net Loss Income,X
Consolidated Net Income Loss,X
Consolidated Net Income/-Loss,X
Consolidated Net Income After Dividends On Preferred And Preference Stock Of Subsidiaries,X
Consolidated Net Income,X
Net Income From Consolidated Operations,OI


In [91]:
df[filter] = 'CNI'

In [92]:
filter = df.index.str.contains(r"Loss",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Net",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Cons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Invest|Decons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Consolidated Net Loss Income,CNI
Consolidated Net Loss,X
Consolidated Net Earnings Loss,X
Consolidated Net Income Loss,CNI
Consolidated Net Income/-Loss,CNI


In [93]:
df[filter] = 'CNI'

In [94]:
filter = df.index.str.contains(r"Earnings",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Net",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Cons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Invest|Decons",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"claims|interest|charge|Admin|revenue",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"services|prod|purc|segment|affil|acqui",case=False, regex=True, na=False) & \
           ~ df.index.str.contains(r"fees|shares|note|loan|non|other|goods",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Consolidated Net Earnings Loss,CNI
Consolidated Net Earnings,X


In [95]:
df[filter] = 'CNI'

In [96]:
filter = df.index.str.contains(r"Net Earnings",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Disco|Affi|Avail|Basic|Dil",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Amort|Includ|Beg|Other|Reclass|Noncon|Share",case=False, regex=True, na=False)

df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Earnings Loss Before Income Taxes,X
Net Earnings/-Loss,X
Net Earnings From Continuing Operations,ICO
Equity In Net Earnings Of Investees,X
Equity In Net Earnings Of Kindred At Home,X
Net Earnings Of The Group,X
Consolidated Net Earnings Loss,CNI
Net Earnings Note,X
Net Earnings From Above,X
Net Earnings Before Income Tax Expense,X


In [97]:
df[filter] = 'NI'

In [98]:
filter = df.index.str.contains(r"Net Income",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Disco|Affi|Avail|Basic|Dil|Appl",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Alloc|Invest|Member|Class",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Est|Adj|Acqui|Stock|Contr|Non|Gain|Equity",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Amort|Includ|Beg|Other|Reclass|Noncon|Share",case=False, regex=True, na=False)
         
df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Wells Fargo Net Income,X
Net Income Loss And Comprehensive Income Loss,X
-Net Income Loss,X
"Net Income Yum! Brands, Inc.",X
Net Income / Loss From Continuing Operations,ICO
Net Income Before Provision For Benefit From Income Taxes,X
Net Income From Continuing Operations,ICO
Net Income Loss,X
Net Income Before Provision For Income Taxes,X
Net Income Loss Before Income Tax Expense,X


In [99]:
df[filter] = 'NI'

In [100]:
filter = df.index.str.contains(r"Net Loss",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"On|From|Cons|Due|Repos|Asset|For",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Resid",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Attrib|Disco|Affi|Avail|Basic|Dil|Appl",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Alloc|Invest|Member|Class",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Est|Adj|Acqui|Stock|Contr|Non|Gain|Equity",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Amort|Includ|Beg|Other|Reclass|Noncon|Share",case=False, regex=True, na=False)
         
df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Loss Before Income Taxes,X
Net Loss Income,X
Net Loss/Income,X
Net Loss Before Income Tax Benefit,X
Net Loss,X
Net Loss Before Tax,X
Net Loss/Earnings,X
Iké Net Losses Note,X
Net Loss Before Taxes,X
Net Loss Earnings,X


In [101]:
df[filter] = 'NI'

In [102]:
filter = df.index.str.contains(r"Loss",case=True, regex=True, na=False) & \
             df.index.str.contains(r"Operat",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Divest|Invest|Prop|Lit|Capital|Fin|Aff",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Disc|Equity|Stock|Share|Before|Attrib|Sale",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Amort|Includ|Beg|Other|Reclass|Noncon|Share",case=False, regex=True, na=False)
         
df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Operating Loss/Income,OI
Net Loss Earnings From Continuing Operations,ICO
Loss Income From Operations,OI
Operating Losses,OI
"Income Loss From Continuing Operations, Net Of Tax",ICO
Net Income / Loss From Continuing Operations,NI
Operating Income Loss From Continuing Operations,ICO
Loss From Operations,X
Earnings Loss From Continuing Operations,ICO
Non-Operating Income Loss,X


In [103]:
df[filter] = 'LCO'

In [104]:
filter = df.index.str.contains(r"^Deprec",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Lease",case=True, regex=True, na=False) & \
           ~ df.index.str.contains(r"Share|Part|Rev|Sale|Intang|Cloud|Prop|Discon",case=True, regex=True, na=False)
         
df[filter]

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Depreciation-Sg&A,X
Depreciation And Amortization Expense,X
Depreciation – Sg&A,X
Depreciation On Real Estate And Other Costs,X
Depreciation And Depletion,X
Depreciation Expense,X
Depreciable Real Estate Reserves,X
Depreciation And Amortization Of Fixed Assets,X
Depreciation And Amortization Expenses,X
Depreciation On Corporate Assets,X


In [105]:
df[filter] = 'DEP'

In [106]:
df_line_items = df[df['type'] != 'X']
df_line_items

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Sales And Other Operating Income,R
General And Administration,SGA
Wells Fargo Net Income,NI
Operating Earnings,OI
Costs Of Sales,CR
...,...
Net Income Loss From Continuing Operations,LCO
"Earnings From Continuing Operations, Before Tax",ICO
Cost Of Revenues From Clean Coal Activities,CR
Sales And Other Operating Revenue,R


## Save Mappings

In [107]:
# Save mappings

mappings = {}

mappings['headings'] = df_headings
mappings['line_items'] = df_line_items

# Save to file

with open(PROJ_ROOT_PATH + '/pickle/mappings_income_stage3.pkl', 'wb') as f:
  pickle.dump(mappings, f)

In [111]:
df_line_items[df_line_items['type'] == 'R']

Unnamed: 0_level_0,type
line_items,Unnamed: 1_level_1
Net Sales And Other Operating Income,R
Net Revenues Before Provision For Doubtful Accounts,R
Total Net Revenue,R
Net Service Revenue,R
Sales And Other Expenses,R
Revenues Before Reimbursements Net Revenues,R
"Membership Sales Commissions, Deferred, Net",R
Sales Of Fuel Cell Systems And Related Infrastructure,R
Sales Revenue Net,R
"Revenues, Net Of Recourse And Reinsurance Expense",R
