In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import openpyexcel
from openpyexcel import workbook, load_workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyexcel.utils import get_column_letter
from itertools import islice
from os import listdir
import re
import csv
import datetime
import pickle

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.0f}'.format

# Map Line Items

In [3]:
df = pd.read_csv('CSV/cash_map_line_items.csv')

In [4]:
df.set_index('line_item', inplace=True)
df

Unnamed: 0_level_0,type
line_item,Unnamed: 1_level_1
Net cash generated from operating activities,NCO
Payment of dividends,D
Change in accounts and construction payables related to property and equipment,PPE
Cash dividends,D
Net cash used in operating activities,NCO
Net cash flows (used in) financing activities,NCF
Net cash provided by (used in) financing activities,NCF
Net Cash Used in Investing Activities,NCI
Net cash flows used in investing activities,NCI
Net cash provided by (used for) operating activities,NCO


In [8]:
# Keep These lines - Used to set type in line items

def set_type:
    
df[df.index.str.contains(r"(consoilidated)?Net Income",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"other|share|attributable|discontinued",case=False, regex=True, na=False)] = 'NI'

df[df.index.str.contains(r"Net cash.*operating activities",case=False, regex=True, na=False) &
   ~ df.index.str.contains(r"other|non|interest|discontinued|equity",case=False, regex=True, na=False)] = 'NCO'

df[df.index.str.contains(r"Net cash.*financing activities",case=False, regex=True, na=False) &
   ~ df.index.str.contains(r"other|non|interest|discontinued|equity",case=False, regex=True, na=False)] = 'NCF'

df[df.index.str.contains(r"property",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"proceeds|loss|sale|depreciation|not yet paid|charges|disposal|lease",case=False, regex=True, na=False)] = 'PPE'

df[df.index.str.contains(r"capital.*expenditures",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"proceeds|loss|sale|depreciation|not yet paid|charges|disposal|lease",case=False, regex=True, na=False)] = 'CE'

df[df.index.str.contains(r"cash.*end.*year",case=False, regex=True, na=False) & \
   ~ df.index.str.contains(r"proceeds|loss|sale|depreciation|not yet paid|charges|disposal|lease",case=False, regex=True, na=False)] = 'CEY'

df[df.index.str.contains(r"re?purchase",case=False, regex=True, na=False) & \
   ~df.index.str.contains(r"note|tax|withholidng|proceed|issuance|debt|interest|reverse|employee|award|unsettled|change",case=False, regex=True, na=False)] = 'SBB' 

df[df.index.str.contains(r"dividends",case=False, regex=True, na=False) & \
   ~df.index.str.contains(r"affiliates|equity|not paid|subsidiaries|change|increase|interest",case=False, regex=True, na=False)] = 'D' 

df[df.index.str.contains(r"compensation",case=False, regex=True, na=False) & \
   ~df.index.str.contains(r"affiliates|equity|not paid|subsidiaries|change|increase|interest|tax|accrued|proceeds",case=False, regex=True, na=False)] = 'SC' 


  after removing the cwd from sys.path.


In [None]:
#Cash SHeet Mappings for line items:

#NI     Net Income
#NCO    Net Cash Provide by Operating Activities
#SC     Stock based compensation
#D      Dividends
#SBB    Stock buy backs
#NCF    Net Cash used for financing purposes
#NCI    Net Cash Used fot Investing 
#CE     Capital Expenditure
#PPE    Property, Plant and Equipment
#CEY    Cash at end of year


In [90]:
# Save to file

def save_line_items(df):
    
  df.to_csv('CSV/cash_map_line_items.csv')

In [91]:
#Check rows which are not set to type 

df[df['type'].isna()]

Unnamed: 0_level_0,type
line_item,Unnamed: 1_level_1
"Purchases of property and equipment excluding capital leases, net of sales",
"Available-for-sale marketable securities, acquisitions",
Investments and acquisitions (net of cash acquired),
Cash and Cash Equivalents Held-for-sale,
Change in other operating assets and liabilities (Note 26),
...,...
Gain on sale of business/investment,
Payments to acquire ownership interest in subsidiary,
Decrease (increase) in restricted cash and investments,
Other Current Assets and Liabilities,


In [93]:
# Select data which have a type set

df = df[~df['type'].isna()]

In [94]:
save_line_items(df)

# Map Headings

In [6]:
df = pd.read_csv('CSV/cash_map_headings.csv')

In [7]:
df.set_index('heading', inplace=True)
df

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Adjustments to reconcile consolidated net earnings to net cash provided by operating activities:,ARNI
Investing Activities,CIA
Cash flows from operating activities:,CFOA
Cash Flows from Financing Activities,CFA
Operating activities:,CFOA
Adjustments to arrive at cash provided by operating activities:,ARNI
CASH PROVIDED BY / (USED IN) OPERATING ACTIVITIES,CFOA
Significant Non-cash Investing and Financing Activity,CFA
Changes in Operating Assets and Liabilities [Abstract],COAL
Adjustments to reconcile net income (loss) to net cash flows from operating activities:,ARNI


In [7]:
#Cash Sheet Mappings for headings:

#CFOA    Cask flow from operating activities
#COAL    Changes in operating assets and liabilities
#ARNI    Adjustments to reconcile net income
#CIA     Cash flow from investing activities
#CFA     Cash flow from financing activities

In [6]:
# Keep these lines - Used to set type in headings

def set_headings():
    
  df[df.index.str.contains(r"operating",case=False, regex=True, na=False) & \
     ~ df.index.str.contains(r"adjustments|discontinued|change|reconciliation",case=False, regex=True, na=False)] = 'CFOA'

  df[df.index.str.contains(r"change.*operating",case=False, regex=True, na=False) & \
    ~ df.index.str.contains(r"discontinued|reconciliation",case=False, regex=True, na=False)] = 'COAL'
    
  df[df.index.str.contains(r"adjust.*operating",case=False, regex=True, na=False) & \
    ~ df.index.str.contains(r"change|discontinued|reconciliation",case=False, regex=True, na=False)] = 'ARNI' 

  df[df.index.str.contains(r"investing",case=False, regex=True, na=False) & \
    ~ df.index.str.contains(r"supplemental|change|discontinued|reconciliation",case=False, regex=True, na=False)] = 'CIA'
    
  df[df.index.str.contains(r"financing",case=False, regex=True, na=False) & \
    ~ df.index.str.contains(r"supplemental|change|discontinued|reconciliation",case=False, regex=True, na=False)] = 'CFA'

In [27]:
# Save to file

def save_headings(df):
    
  df.to_csv('CSV/cash_map_headings.csv')

In [40]:
Check rows which are not set to type 

df[df['type'].isna()]

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Supplemental Disclosures of Cash Flow Information:,
Statement Of Cash Flows [Abstract],
Discontinued Operations [Member],
Cash and cash equivalents:,
"Cash and cash equivalents, and restricted cash and cash equivalents:",
Adjustments to reconcile net income to net cash provided (used) by operations:,
Supplemental Disclosure of Cash Flow Information,
"Short-term borrowings, by original maturity",
Supplemental cash flows information:,
Power [Member],


In [42]:
# Select data which have a type set

df = df[~df['type'].isna()]

In [43]:
save_headings(df)

In [95]:
#Cash Sheet Mappings for headings:

#CFOA    Cask flow from operating activities
#ARNI    Adjustments to reconcile net income
#COAL    Changes in operating assets and liabilities
#CIA     Cash flow from investing activities
#CFA     Cash flow from financing activities

#Cash SHeet Mappings for line items:

#NI     Net Income
#NCO    Net Cash Provide by Operating Activities
#SC     Stock based compensation
#D      Dividends
#SBB    Stock buy backs
#NCF    Net Cash used for financing purposes
#NCI    Net Cash Used fot Investing 
#CE     Capital Expenditure
#PPE    Property, Plant and Equipment
#CEY    Cash at end of year


# Create mapping for Headings and Line Items

In [None]:
cash_table_map = {'CFOA': ['NI'],
                  'ARNI': ['SC'],
                  'COAL': ['NCO'],
                  'CIA':  ['PPE','CE','NCI'],
                  'CFA':  ['SBB','D','NCF'],
                  'No Heading': ['CEY']}
        

# Create lookup for Headings

In [104]:
df

Unnamed: 0_level_0,type
heading,Unnamed: 1_level_1
Adjustments to reconcile consolidated net earnings to net cash provided by operating activities:,ARNI
Investing Activities,CIA
Cash flows from operating activities:,CFOA
Cash Flows from Financing Activities,CFA
Operating activities:,CFOA
Adjustments to arrive at cash provided by operating activities:,ARNI
CASH PROVIDED BY / (USED IN) OPERATING ACTIVITIES,CFOA
Significant Non-cash Investing and Financing Activity,CFA
Changes in Operating Assets and Liabilities [Abstract],COAL
Adjustments to reconcile net income (loss) to net cash flows from operating activities:,ARNI


In [106]:
x = df.to_dict()
cash_heading_lookup = x['type']
cash_heading_lookup

{'Adjustments to reconcile consolidated net earnings to net cash provided by operating activities:': 'ARNI',
 'Investing Activities': 'CIA',
 'Cash flows from operating activities:': 'CFOA',
 'Cash Flows from Financing Activities': 'CFA',
 'Operating activities:': 'CFOA',
 'Adjustments to arrive at cash provided by operating activities:': 'ARNI',
 'CASH PROVIDED BY / (USED IN) OPERATING ACTIVITIES': 'CFOA',
 'Significant Non-cash Investing and Financing Activity': 'CFA',
 'Changes in Operating Assets and Liabilities [Abstract]': 'COAL',
 'Adjustments to reconcile net income (loss) to net cash flows from operating activities:': 'ARNI',
 'Changes in operating assets and liabilities net of effects from mergers and disposals of businesses:': 'COAL',
 'Cash flows used in investing activities:': 'CIA',
 'Cash Flows from Operating Activities': 'CFOA',
 'Adjustments to reconcile net income to net cash from operating activities:': 'ARNI',
 'Changes in operating assets and liabilities, net of ac

In [107]:
cash_heading_lookup['Significant Non-cash Investing and Financing Activity']

'CFA'

# Create Lookup for Line Items


In [108]:
df = pd.read_csv('cash_map_line_items.csv')
df.set_index('line_item', inplace=True)
df

Unnamed: 0_level_0,type
line_item,Unnamed: 1_level_1
Net cash generated from operating activities,NCO
Payment of dividends,D
Change in accounts and construction payables related to property and equipment,PPE
Cash dividends,D
Net cash used in operating activities,NCO
Net cash flows (used in) financing activities,NCF
Net cash provided by (used in) financing activities,NCF
Net Cash Used in Investing Activities,NCI
Net cash flows used in investing activities,NCI
Net cash provided by (used for) operating activities,NCO


In [109]:
x = df.to_dict()
cash_line_item_lookup = x['type']
cash_line_item_lookup

{'Net cash generated from operating activities': 'NCO',
 'Payment of dividends': 'D',
 'Change in accounts and construction payables related to property and equipment': 'PPE',
 'Cash dividends': 'D',
 'Net cash used in operating activities': 'NCO',
 'Net cash flows (used in) financing activities': 'NCF',
 'Net cash provided by (used in) financing activities': 'NCF',
 'Net Cash Used in Investing Activities': 'NCI',
 'Net cash flows used in investing activities': 'NCI',
 'Net cash provided by (used for) operating activities': 'NCO',
 'Net cash used for financing activities from continuing operations': 'NCF',
 'Net cash provided by (used for) investing activities from continuing operations': 'NCI',
 'Excess tax benefit from share-based compensation': 'D',
 'Non-cash property additions': 'PPE',
 'Net cash used in investing activities from continuing operations': 'NCI',
 'Acquisitions of property and equipment included in liabilities': 'PPE',
 'Net Cash (Used for)/Provided by Investing Acti

In [110]:
cash_line_item_lookup['Net cash used in operating activities']

'NCO'

# Combine Lookups for heading and line items into one dictionary and save

In [115]:
cash_lookup = {'heading': cash_heading_lookup,
               'line_item' : cash_line_item_lookup}

In [116]:
# Save to file

with open('cash_map_lookup.pkl', 'wb') as fp:
  pickle.dump(cash_lookup, fp)
