In [1]:
import json

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#reading the data from a file
with open('AAPL_10K_2021.txt') as f:
    aapl_txt = f.read()
    
#converting the text string into a dictionary
aapl_dict = json.loads(aapl_txt)

### Extracting values from the Income Statement

In [3]:
aapl_dict['StatementsOfIncome'].keys()

dict_keys(['RevenueFromContractWithCustomerExcludingAssessedTax', 'CostOfGoodsAndServicesSold', 'GrossProfit', 'ResearchAndDevelopmentExpense', 'SellingGeneralAndAdministrativeExpense', 'OperatingExpenses', 'OperatingIncomeLoss', 'NonoperatingIncomeExpense', 'IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest', 'IncomeTaxExpenseBenefit', 'NetIncomeLoss', 'EarningsPerShareBasic', 'EarningsPerShareDiluted', 'WeightedAverageNumberOfSharesOutstandingBasic', 'WeightedAverageNumberOfDilutedSharesOutstanding'])

Note the the *Statement of Income* is published in triplicate, one for each of the last three years. 

For AAPL, the Revenue seems to be broken down by product rather than a sum total. But notice that the third dictionary in the list does NOT include a "segment" key, and it gives the full total Revenue under "value" so I have to systematically extract this somehow.

#### Revenue

In [4]:
#aapl_dict['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'][0]

rev_sum = 0
for i in range(len(aapl_dict['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'])):
    if i % 3 == 0:
        print(aapl_dict['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'][i])

print(f"\nFinal Revenue value is: {aapl_dict['StatementsOfIncome']['RevenueFromContractWithCustomerExcludingAssessedTax'][6]['value']}")

{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'us-gaap:ProductMember'}, 'value': '297392000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'us-gaap:ServiceMember'}, 'value': '68425000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'value': '365817000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'aapl:IPhoneMember'}, 'value': '191973000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'aapl:MacMember'}, 'value': '35190000000'}
{'decimals': '-6', 'unitRef': '

#### Cost of Goods and Services Sold (COGS)
COGS similarly follows the rule of three, and is broken down into Product and Service COGS before finally being summed

In [5]:
for i in range(len(aapl_dict['StatementsOfIncome']['CostOfGoodsAndServicesSold'])):
    if i % 3 == 0:
        print(aapl_dict['StatementsOfIncome']['CostOfGoodsAndServicesSold'][i])
        
print(f"\nFinal COGS value is: {aapl_dict['StatementsOfIncome']['CostOfGoodsAndServicesSold'][6]['value']}")


{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'us-gaap:ProductMember'}, 'value': '192266000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'srt:ProductOrServiceAxis', 'value': 'us-gaap:ServiceMember'}, 'value': '20715000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'value': '212981000000'}

Final COGS value is: 212981000000


#### Gross Profit
Gross Profit is easy enough to extract the value

In [20]:
int(aapl_dict['StatementsOfIncome']['GrossProfit'][0]['value'])

152836000000

#### Research and Development Expense
R&D is easy too

In [21]:
int(aapl_dict['StatementsOfIncome']['ResearchAndDevelopmentExpense'][0]['value'])

21914000000

#### Selling, General & Administrative Expense (SG&A)
Same for SG&A

In [22]:
int(aapl_dict['StatementsOfIncome']['SellingGeneralAndAdministrativeExpense'][0]['value'])

21973000000

#### Operating Expenses
And OpEx

In [23]:
int(aapl_dict['StatementsOfIncome']['OperatingExpenses'][0]['value'])

43887000000

#### Operating Income Loss
Operating Income Loss

In [24]:
for i in range(len(aapl_dict['StatementsOfIncome']['OperatingIncomeLoss'])):
    if i % 3 == 0:
        print(aapl_dict['StatementsOfIncome']['OperatingIncomeLoss'][i])
        
print(f"\nOperating Income Loss: {int(aapl_dict['StatementsOfIncome']['OperatingIncomeLoss'][0]['value'])}")

{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'value': '108949000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'us-gaap:StatementBusinessSegmentsAxis', 'value': 'aapl:AmericasSegmentMember'}, 'value': '53382000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'us-gaap:StatementBusinessSegmentsAxis', 'value': 'aapl:EuropeSegmentMember'}, 'value': '32505000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'us-gaap:StatementBusinessSegmentsAxis', 'value': 'aapl:GreaterChinaSegmentMember'}, 'value': '28504000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'us-gaap:StatementBusinessSegmentsAxis', 'value': 'aa

#### Nonoperating Income Expense

In [25]:
int(aapl_dict['StatementsOfIncome']['NonoperatingIncomeExpense'][0]['value'])

258000000

#### Income before provision for income taxes (EBIT)

In [26]:
int(aapl_dict['StatementsOfIncome']['IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest'][0]['value'])

109207000000

#### Income Tax
Provision for income tax expense (benefit)

In [27]:
int(aapl_dict['StatementsOfIncome']['IncomeTaxExpenseBenefit'][0]['value'])

14527000000

#### Net Income/Earnings
Net Income also includes *retained earnings* but in this case they seem to be the same?

In [28]:
for i in range(len(aapl_dict['StatementsOfIncome']['NetIncomeLoss'])):
    if i % 3 == 0:
        print(aapl_dict['StatementsOfIncome']['NetIncomeLoss'][i])
        
print(f"\nNet Income: {int(aapl_dict['StatementsOfIncome']['NetIncomeLoss'][0]['value'])}")

{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'value': '94680000000'}
{'decimals': '-6', 'unitRef': 'usd', 'period': {'startDate': '2020-09-27', 'endDate': '2021-09-25'}, 'segment': {'dimension': 'us-gaap:StatementEquityComponentsAxis', 'value': 'us-gaap:RetainedEarningsMember'}, 'value': '94680000000'}

Net Income: 94680000000


#### Basic EPS
Be sure to use *float* rather than *int*

In [31]:
float(aapl_dict['StatementsOfIncome']['EarningsPerShareBasic'][0]['value'])

5.67

#### Diluted EPS
Be sure to use *float* rather than *int*

In [32]:
float(aapl_dict['StatementsOfIncome']['EarningsPerShareDiluted'][0]['value'])

5.61

#### Basic Shares Outstanding

In [33]:
int(aapl_dict['StatementsOfIncome']['WeightedAverageNumberOfSharesOutstandingBasic'][0]['value'])

16701272000

#### Diluted Shares Outstanding

In [34]:
int(aapl_dict['StatementsOfIncome']['WeightedAverageNumberOfDilutedSharesOutstanding'][0]['value'])

16864919000

In [35]:
import json

In [36]:
#reading the data from a file
with open('INTC_10K_2021.txt') as f:
    intc_txt = f.read()
    
#converting the text string into a dictionary
intc_dict = json.loads(intc_txt)

The INTC Income Statement has two fields that the AAPL Income Statement doesn't have:
* GainLossOnInvestments
* RestructuringSettlementAndImpairmentProvisions

In [58]:
keys_intc_is = intc_dict['StatementsOfIncome'].keys()
keys_aapl_is = aapl_dict['StatementsOfIncome'].keys()

In [60]:
set(keys_aapl_is & keys_intc_is)

{'CostOfGoodsAndServicesSold',
 'EarningsPerShareBasic',
 'EarningsPerShareDiluted',
 'GrossProfit',
 'IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest',
 'IncomeTaxExpenseBenefit',
 'NetIncomeLoss',
 'NonoperatingIncomeExpense',
 'OperatingExpenses',
 'OperatingIncomeLoss',
 'ResearchAndDevelopmentExpense',
 'RevenueFromContractWithCustomerExcludingAssessedTax',
 'SellingGeneralAndAdministrativeExpense',
 'WeightedAverageNumberOfDilutedSharesOutstanding',
 'WeightedAverageNumberOfSharesOutstandingBasic'}

In [61]:
print(set(keys_aapl_is - keys_intc_is))
print(set(keys_intc_is - keys_aapl_is))

set()
{'GainLossOnInvestments', 'RestructuringSettlementAndImpairmentProvisions'}


In [62]:
#reading the data from a file
with open('10K_datasets/MSFT_10K_2022.txt') as f:
    msft_txt = f.read()
    
#converting the text string into a dictionary
msft_dict = json.loads(msft_txt)

In [63]:
keys_intc_is = intc_dict['StatementsOfIncome'].keys()
keys_aapl_is = aapl_dict['StatementsOfIncome'].keys()
keys_msft_is = msft_dict['StatementsOfIncome'].keys()

In [90]:
keys_aapl_is & keys_intc_is & set(keys_msft_is)

{'CostOfGoodsAndServicesSold',
 'EarningsPerShareBasic',
 'EarningsPerShareDiluted',
 'GrossProfit',
 'IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest',
 'IncomeTaxExpenseBenefit',
 'NetIncomeLoss',
 'NonoperatingIncomeExpense',
 'OperatingIncomeLoss',
 'ResearchAndDevelopmentExpense',
 'RevenueFromContractWithCustomerExcludingAssessedTax',
 'WeightedAverageNumberOfDilutedSharesOutstanding',
 'WeightedAverageNumberOfSharesOutstandingBasic'}

In [88]:
keys_only_aapl_is = keys_aapl_is - set(keys_intc_is | keys_msft_is)
keys_only_intc_is = keys_intc_is - set(keys_aapl_is | keys_msft_is)
keys_only_msft_is = keys_msft_is - set(keys_aapl_is | keys_intc_is)

for field in keys_only_aapl_is:
    print(f"AAPL only: {field}")

print('\n')
for field in keys_only_intc_is:
    print(f"INTC only: {field}")

print('\n')
for field in keys_only_msft_is:
    print(f"MSFT only: {field}")



INTC only: GainLossOnInvestments
INTC only: RestructuringSettlementAndImpairmentProvisions


MSFT only: SellingAndMarketingExpense
MSFT only: GeneralAndAdministrativeExpense


In [89]:
keys_not_aapl_is = set(keys_intc_is | keys_msft_is) - keys_aapl_is
keys_not_intc_is = set(keys_aapl_is | keys_msft_is) - keys_intc_is 
keys_not_msft_is = set(keys_aapl_is | keys_intc_is) - keys_msft_is

for field in keys_not_aapl_is:
    print(f"Not AAPL: {field}")

print('\n')
for field in keys_not_intc_is:
    print(f"Not INTC: {field}")

print('\n')
for field in keys_not_msft_is:
    print(f"Not MSFT: {field}")

Not AAPL: GainLossOnInvestments
Not AAPL: GeneralAndAdministrativeExpense
Not AAPL: RestructuringSettlementAndImpairmentProvisions
Not AAPL: SellingAndMarketingExpense


Not INTC: GeneralAndAdministrativeExpense
Not INTC: SellingAndMarketingExpense


Not MSFT: GainLossOnInvestments
Not MSFT: SellingGeneralAndAdministrativeExpense
Not MSFT: RestructuringSettlementAndImpairmentProvisions
Not MSFT: OperatingExpenses
