In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import openpyexcel
from openpyexcel import workbook, load_workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyexcel.utils import get_column_letter
from itertools import islice
from os import listdir
import re
import csv
import datetime
import pickle

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.0f}'.format

# Map Line Items

In [14]:
df = pd.read_csv('CSV/income_map_line_items.csv')

In [15]:
df.set_index('line_item', inplace=True)


In [None]:
# Keep These lines

df[df.index.str.contains(r"(consoilidated)?Net Income",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"share|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'NI'

df[df.index.str.contains(r"Total revenue",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"net|cost|gross|share|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'TR'

df[df.index.str.contains(r"Total revenue.*(?:net|gross)",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'TR'

df[df.index.str.contains(r"gross.*profit",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"cost|share|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'GP'

df[df.index.str.contains(r"profit",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"gross|cost|share|sharing|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'OIL'

df[df.index.str.contains(r"admin",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"gross|cost|share|sharing|attributable|discontinued|equity",case=False, regex=True, na=False)] = 'SGA'

df[df.index.str.contains(r"operating",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"revenues|income|admin|pension|attributable|discontinued|loss|gains|profit",case=False, regex=True, na=False)] = 'OE'


In [None]:
#Income Sheet Mappings:

#TR     Total Revenue
#TGR    Total Gross Revenue
#TCR    Total Cost of Revenue
#GP     Gross Profit
#OE     Operating Expenses
#SGA    Selling, general administrative
#OIL    Operating Income or loss
#NI     Net Income


In [16]:
# Save to file

def save_line_items(df):
    
  df.to_csv('CSV/income_map_line_items.csv')

In [17]:
#Check rows which are not set to type 

df[df['type'].isna()]

Unnamed: 0_level_0,type
line_item,Unnamed: 1_level_1
"Equity in net income (losses) of investees, net",
Income from Equity Method Investments,
Weighted average shares outstanding - assuming dilution,
Net income (loss) attributable to MGM Resorts International,
Gains on sales of investments in affiliates,
...,...
Sanofi collaboration revenue,
"Other (expense) income , net",
Total on-premise software revenues,
Cost of product revenues (including definite-lived intangible amortization),


In [19]:
# Select data which have a type set

df = df[~df['type'].isna()]

In [20]:
save_line_items(df)

# Map Headings

In [23]:
df = pd.read_csv('CSV/income_map_headings.csv')


In [24]:
df.set_index('heading', inplace=True)
df

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
"Weighted average shares outstanding, basic:",X
COST OF SALES:,
Net income per share attributable to common stockholders:,
Weighted-average common shares outstanding:,
Diluted Earnings Per Common Share,
"Net income attributable to Wynn Resorts, Limited:",
Public Service Electric and Gas Company,
Income Statement [Abstract],
Weighted average outstanding shares:,
Expenses,


In [10]:
#Income Sheet Mappings for headings:

#SR   Sales Revenue
#OE   Operating Expenses


In [11]:
# Keep these lines - Used to set type in headings

def set_headings_type:

df[df.index.str.contains(r"(?:cost|expense)",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"hardware|royalty|other|reimbursed|admin|contract|cloud|anti|collab|attributable|discontinued|loss|gains|profit",case=False, regex=True, na=False)] = 'OE'




In [70]:
df[df.index.str.contains(r"(?:cost|expense)",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"hardware|royalty|other|reimbursed|admin|contract|cloud|anti|collab|attributable|discontinued|loss|gains|profit",case=False, regex=True, na=False)] = 'OE'


In [25]:
# Save to file

def save_headings(df):
    
  df.to_csv('CSV/income_map_headings.csv')

In [26]:
save_headings(df)

# Create Mapping for Heading and Line Items

In [27]:
income_table_map = {'CFOA': ['NI'],
                    'No Heading': ['CEY']}
        

In [29]:
df_heading = pd.read_csv('CSV/income_map_headings.csv')
df_heading.set_index('heading', inplace=True)

df_line_item = pd.read_csv('CSV/income_map_line_items.csv')
df_line_item.set_index('line_item', inplace=True)

In [32]:
x = df_heading.to_dict()
income_heading_lookup = x['type']
income_heading_lookup

x = df_line_item.to_dict()
income_line_item_lookup = x['type']
income_line_item_lookup

{'Net (income) loss attributable to noncontrolling interests and redeemable noncontrolling interests': 'X',
 'Administrative expenses': 'SGA',
 'Total net revenues': 'TNR',
 'Total operating costs': 'OE',
 'Other Cost and Expense, Operating': 'OE',
 'GM Financial interest, operating and other expenses': 'OE',
 'Consolidated net income (loss)': 'NI',
 'Net Income (Loss)': 'NI',
 'Total cost and expenses': 'OE',
 'OPERATING EXPENSES:': 'OE',
 'Operating costs and expenses (excluding depreciation and amortization)': 'OE',
 'Operating, general and administrative': 'SGA',
 'NET INCOME FROM CONTINUING OPERATIONS': 'NI',
 'Consolidated profit before taxes': 'OIL',
 'Other operating and administrative': 'SGA',
 'Selling, general and administrative': 'SGA',
 'Consolidated net income': 'NI',
 'Non-operating items from unconsolidated affiliates': 'OE',
 'Operating overhead expense': 'OE',
 'Consolidated profit before tax': 'OIL',
 'Operating expenses:': 'OE',
 'Net income (loss)': 'NI',
 'Selling

# Combine Lookups for heading and line items into one dictionary and save


In [33]:
income_lookup = {'heading': income_heading_lookup,
               'line_item' : income_line_item_lookup}

In [35]:
# Save to file

with open('income_map_lookup.pkl', 'wb') as fp:
  pickle.dump(income_lookup, fp)
