In [1]:
import pandas as pd
import numpy as np
import os, glob
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import re

pd.set_option('max_rows',200)

**Which tickers did not get consolidated?**

In [2]:
currents, consolidated = set([os.path.basename(f) for f in glob.glob('currents/*.pkl')]), set(set([os.path.basename(f) for f in glob.glob('consolidated/*.pkl')]))

In [3]:
currents-consolidated

set()

**How many tickers have we conslidated?**

In [4]:
len(consolidated)

502

**Which elements are within "isStartEndPeriod"**

Combine all consolidated dataframes together

In [5]:
from multiprocessing import Pool

In [6]:
%%time

def get_df(f):
    try:
        df_t = pd.read_pickle(f)
        return df_t
    except Exception as e:
        print(e)
        return

p = Pool(3)
dfs = p.map(get_df, glob.glob('consolidated/*.pkl'))

CPU times: user 3.6 s, sys: 2.8 s, total: 6.4 s
Wall time: 12.5 s


In [7]:
df = pd.concat(dfs, ignore_index=True, sort=False)

In [8]:
# df.to_pickle('consolidated_all.pkl')

In [9]:
period_accounts = df[df.isStartEndPeriod & (df.Account.str.contains('us-gaap:'))].Account.value_counts()

In [10]:
len(period_accounts)

3898

In [11]:
period_accounts.head(100)

us-gaap:EarningsPerShareDiluted                                                                                        19284
us-gaap:IncomeTaxExpenseBenefit                                                                                        19237
us-gaap:EarningsPerShareBasic                                                                                          19204
us-gaap:WeightedAverageNumberOfDilutedSharesOutstanding                                                                18375
us-gaap:WeightedAverageNumberOfSharesOutstandingBasic                                                                  18333
us-gaap:NetIncomeLoss                                                                                                  17730
us-gaap:CashAndCashEquivalentsPeriodIncreaseDecrease                                                                   17689
us-gaap:NetCashProvidedByUsedInInvestingActivities                                                                     16033


In [12]:
period_accounts.quantile(np.linspace(0,1,11))

0.0        1.0
0.1        4.0
0.2       13.0
0.3       26.0
0.4       44.0
0.5       70.0
0.6      117.0
0.7      195.9
0.8      377.0
0.9      953.6
1.0    19284.0
Name: Account, dtype: float64

**Let's look at Ford**

In [13]:
F = pd.read_pickle('consolidated/F.pkl')

In [14]:
F[F.Value2==3009000000]

Unnamed: 0,Value,Account,Category,Period,ContextID,isStartEndPeriod,isInstantPeriod,isForeverPeriod,startDateTime,endDateTime,...,cik,ReportDate,ReportType,Days,endDateTime2,to_consolidate,startDateTime2,Value2,Days2,Period2
1640,3009000000.0,us-gaap:FinancialServicesRevenue,False,2018-04-012018-06-30,FD2018Q2QTD,True,False,False,2018-04-01,2018-07-01,...,37996,2018-07-26,10-Q,91.0,2018-06-30,False,2018-04-01,3009000000.0,91.0,2018-04-012018-06-30


In [15]:
F.query("endDateTime2=='2017-06-30' and isStartEndPeriod and Account in ('us-gaap:SalesRevenueNet','us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest','us-gaap:ProfitLoss','us-gaap:NonoperatingIncomeExpense','us-gaap:ComprehensiveIncomeNetOfTax','us-gaap:IncomeTaxExpenseBenefit','us-gaap:CostOfGoodsAndServicesSold','us-gaap:InterestExpense','us-gaap:InvestmentIncomeInterest','us-gaap:SellingGeneralAndAdministrativeExpense','us-gaap:CostsAndExpenses')")

Unnamed: 0,Value,Account,Category,Period,ContextID,isStartEndPeriod,isInstantPeriod,isForeverPeriod,startDateTime,endDateTime,...,cik,ReportDate,ReportType,Days,endDateTime2,to_consolidate,startDateTime2,Value2,Days2,Period2
973,2260000000.0,us-gaap:ComprehensiveIncomeNetOfTax,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,2260000000.0,91.0,2017-04-012017-06-30
1099,33342000000.0,us-gaap:CostOfGoodsAndServicesSold,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,33342000000.0,91.0,2017-04-012017-06-30
1146,38301000000.0,us-gaap:CostsAndExpenses,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,38301000000.0,91.0,2017-04-012017-06-30
1788,2266000000.0,us-gaap:IncomeLossFromContinuingOperationsBefo...,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,2266000000.0,91.0,2017-04-012017-06-30
2071,211000000.0,us-gaap:IncomeTaxExpenseBenefit,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,211000000.0,91.0,2017-04-012017-06-30
2152,277000000.0,us-gaap:InterestExpense,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,277000000.0,91.0,2017-04-012017-06-30
2174,109000000.0,us-gaap:InvestmentIncomeInterest,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,109000000.0,91.0,2017-04-012017-06-30
2501,732000000.0,us-gaap:NonoperatingIncomeExpense,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,732000000.0,91.0,2017-04-012017-06-30
3473,2055000000.0,us-gaap:ProfitLoss,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,2055000000.0,91.0,2017-04-012017-06-30
3691,2756000000.0,us-gaap:SellingGeneralAndAdministrativeExpense,False,2017-04-012017-06-30,FD2017Q2QTD,True,False,False,2017-04-01,2017-07-01,...,37996,2018-07-26,10-Q,91.0,2017-06-30,False,2017-04-01,2756000000.0,91.0,2017-04-012017-06-30


In [16]:
F.query("endDateTime2=='2017-06-30' and isStartEndPeriod").Account.value_counts().index.tolist()

['f:GainLossonInvestment',
 'us-gaap:WeightedAverageNumberOfSharesOutstandingBasic',
 'us-gaap:ComprehensiveIncomeNetOfTax',
 'us-gaap:AvailableForSaleSecuritiesGrossRealizedGains',
 'us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic',
 'us-gaap:ProfitLoss',
 'us-gaap:ProceedsFromOtherDebt',
 'us-gaap:CommonStockDividendsPerShareDeclared',
 'us-gaap:PaymentsOfDividendsCommonStock',
 'us-gaap:FinancialServicesCosts',
 'us-gaap:EarningsPerShareDiluted',
 'us-gaap:FinancingReceivableAllowanceForCreditLossesRecovery',
 'us-gaap:NetIncomeLossAttributableToNoncontrollingInterest',
 'us-gaap:ProceedsFromRepaymentsOfShortTermDebt',
 'us-gaap:EffectOfExchangeRateOnCashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents',
 'f:UnrecognizedTaxBenefitsInterestIncome',
 'us-gaap:NetCashProvidedByUsedInFinancingActivities',
 'us-gaap:ProvisionForLoanLeaseAndOtherLosses',
 'us-gaap:PaymentsRelatedToTaxWithholdingForShareBasedCompensation',
 'us-gaap:Revenues',
 'us-gaap:NetCashProvidedByUs

# Analysis

**INCOME STATEMENT**

In [2]:
def get_startend_valuecounts(ticker):
    df = pd.read_pickle('consolidated/{}.pkl'.format(ticker))
    return df.query("isStartEndPeriod").Account.value_counts()

In [3]:
def get_income_statements(ticker):
    df = pd.read_pickle('consolidated/{}.pkl'.format(ticker))
    valuecounts = df.query("isStartEndPeriod").Account.value_counts()
    revenue = valuecounts[[c for c in valuecounts.index if 'revenue' in c.lower()]]
    revenue = revenue[revenue==revenue.max()]
    revenue = sorted(revenue.index, key=lambda x: len(x))[:2]
    
    accounts = [
     'us-gaap:Revenues',
     'us-gaap:FinancialServicesRevenue',
     'us-gaap:SalesRevenueNet',
     'us-gaap:OilAndGasRevenue',
     'us-gaap:CostOfGoodsAndServicesSold',
     'us-gaap:CostOfGoodsSold',
     'us-gaap:CostOfRevenue',
     'us-gaap:GrossProfit',
     'us-gaap:FinancialServicesCosts',
     'us-gaap:SellingGeneralAndAdministrativeExpense',
     'us-gaap:CostsAndExpenses',
     'us-gaap:OperatingIncomeLoss',
     'us-gaap:InterestExpense',
     'us-gaap:OtherNonoperatingIncomeExpense',
     'us-gaap:IncomeLossFromEquityMethodInvestments',
     'us-gaap:NonoperatingIncomeExpense',
     'us-gaap:NonOperatingIncomeLoss',
     'us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest',
     'us-gaap:IncomeTaxExpenseBenefit',
     'us-gaap:ProfitLoss',
     'us-gaap:NetIncomeLoss']
    
    accounts = pd.Series(revenue+accounts).drop_duplicates().tolist()
    
    df['Months2'] = df.endDateTime2.dt.month - df.startDateTime2.dt.month + 1
    income_pivot = df.query('Account in @accounts').pivot_table('Value2',['Months2','startDateTime2','endDateTime2'],'Account')
    income_pivot.columns.name = None
    income_cleaned = income_pivot[[c for c in accounts if c in income_pivot.columns]]
    income_cleaned.reset_index(inplace=True)
    income_cleaned = income_cleaned.query("Months2==3")
#     income_cleaned = income_cleaned.dropna(1,thresh=income_cleaned.shape[0]-income_cleaned.shape[0]//5)
    income_cleaned.drop(['Months2','startDateTime2'], 1, inplace=True)
    income_cleaned.set_index('endDateTime2', inplace=True)
    income_cleaned.columns = [re.sub('^.*:','',c) for c in income_cleaned]
    
    return income_cleaned

In [None]:
income = get_income_statements('XOM')

fig,ax = plt.subplots(figsize=(12,9))
income.plot(ax=ax, marker='o')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


<matplotlib.axes._subplots.AxesSubplot at 0x7f446504ab70>

In [9]:
get_startend_valuecounts('FB')

us-gaap:AmortizationOfIntangibleAssets                                                                                                               31
us-gaap:WeightedAverageNumberOfDilutedSharesOutstanding                                                                                              31
us-gaap:OperatingLeasesRentExpenseNet                                                                                                                31
us-gaap:ShareBasedCompensation                                                                                                                       31
us-gaap:OtherComprehensiveIncomeLossForeignCurrencyTransactionAndTranslationAdjustmentNetOfTax                                                       31
us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest                                                  31
us-gaap:EarningsPerShareBasic                                                           