In [41]:
from enum import Enum
import pandas as pd
import os
from pathlib import Path
import numpy as np

In [42]:
class Sectors(Enum):
    COMMUNICATION_SERVICES = 'Communication Services' 
    CONSUMER_DISCRETIONARY = 'Consumer Discretionary'
    CONSUMER_STAPLES = 'Consumer Staples'
    ENERGY = 'Energy' 
    FINANCIALS = 'Financials' 
    HEALTH_CARE = 'Health Care' 
    INDUSTRIALS = 'Industrials' 
    INFORMATION_TECHNOLOGY = 'Information Technology'
    MATERIALS = 'Materials' 
    REAL_ESTATE = 'Real Estate' 
    UTILITIES = 'Utilities'

In [43]:
class Facts(Enum):
    TOTAL_NET_SALES = 'RevenueFromContractWithCustomerExcludingAssessedTax'
    GROSS_PROFIT = 'GrossProfit'
    OPERATING_PROFIT_LOSS = 'OperatingIncomeLoss'
    EARNINGS_PER_SHARE_DILUTED = 'EarningsPerShareDiluted' 
    CURRENT_ASSETS = 'AssetsCurrent'
    NON_CURRENT_ASSETS = 'AssetsNoncurrent'
    CURRENT_LIABILITIES = 'LiabilitiesCurrent'
    NON_CURRENT_LIABILITES = 'LiabilitiesNoncurrent'
    DIVIDENDS = 'CommonStockDividendsPerShareDeclared' 
    CASH_GENERATED_BY_OPERATING_ACTIVITIES = 'NetCashProvidedByUsedInOperatingActivities'
    COST_OF_SALES = 'CostOfGoodsAndServicesSold' 
    CURRENT_ASSETS_INVENTORIES = 'InventoryNet'
    CURRENT_ASSETS_DEBTORS = 'AccountsReceivableNetCurrent'
    CURRENT_ASSETS_CASH = 'CashAndCashEquivalentsAtCarryingValue'
    CURRENT_ASSETS_OTHERS  = 'OtherAssetsCurrent'
    CURRENT_LIABILTIES_LOANS = 'LongTermDebtCurrent'
    CURRENT_LIABILITIES_CREDITORS = 'AccountsPayableCurrent' 
    CURRENT_LIABILITIES_OTHERS = 'OtherLiabilitiesCurrent'
    NON_CURRENT_LOANS = 'LongTermDebtNoncurrent'

In [45]:
for sector in list(Sectors):
    for filename in os.listdir('../sec-new/' + sector.value):
        ticker = filename.split()[-1].split('.csv')[0].split('-')[0]
        year = filename.split()[-1].split('.csv')[0].split('-')[1]
        
        output_dir = Path(f"../aggregated_financial_data/{sector.value}")
        output_dir.mkdir(parents=True, exist_ok=True)
        
        df = pd.read_csv(f"../sec-new/{sector.value}/{filename}", parse_dates = ['start_date', 'end_date']).iloc[:, 1:]
        df = df[df['fact'].isin([fact.value for fact in list(Facts)])]
        # Only retrieve the net / overall values. E.g. Net sales has a few sub values which is described under 'dimensions'
        df = df[df['dimensions'].isna()]
        df['stock_id'] = ticker
        df['year'] = year
        
        # Function to filter rows to retrieve the "fact" that has a duration of a year and the latest date of the file
        ## Calculate the difference between start_date and end_date
        df['date_diff'] = (df['end_date'] - df['start_date']).dt.days
        def filter_max_diff(group):
            max_diff = group['date_diff'].max()
            return group[(group['date_diff'] >= 360) | (group['date_diff'] == max_diff)]

        def filter_max_year(group):
            max_year = group['start_date'].dt.year.max()
            return group[(group['start_date'].dt.year == max_year)]

        # Group by 'fact' column and apply the filter function, then reset the index
        df = df.groupby('fact', group_keys=False).apply(filter_max_diff).reset_index(drop=True)
        df = df.groupby('fact', group_keys=False).apply(filter_max_year).reset_index(drop=True)
        df = df.drop(columns=['date_diff', 'dimensions'])

        df = df.drop_duplicates()
        df.to_csv(f"{output_dir}/{filename}")

In [47]:
facts = [fact.value for fact in list(Facts)]

In [48]:
# Retrieve missing facts for each file and output a list
missing_data = pd.DataFrame(columns=['sector', 'stock', 'facts'])
for sector in list(Sectors):
    for filename in os.listdir(f"../aggregated_financial_data/{sector.value}"):
        df = pd.read_csv(f"../aggregated_financial_data/{sector.value}/{filename}", parse_dates = ['start_date', 'end_date']).iloc[:, 1:]
        listOfFactsInStock = df['fact'].unique().tolist()
        diff_set1 = np.setdiff1d(facts, listOfFactsInStock)
        diff_set2 = np.setdiff1d(listOfFactsInStock, facts)
        finalized_set = np.concatenate((diff_set1, diff_set2))
        missing_data.loc[len(missing_data.index)] = [sector.value, filename, finalized_set] 
        missing_data.to_csv(f"../aggregated_financial_data/missing_data.csv")

In [50]:
class Information_Technology(Enum):
    AAPL = 'AAPL'
    GOOGL = 'GOOGL'
    META = 'META'
    MSFT = 'MSFT'
    NVDA = 'NVDA'

In [51]:
class Units_Group(Enum):
    MILLIONS = 1000000
    THOUSANDS = 1000

In [52]:
# input - stock, file no, data to input
# functions - input data

class Financial_Data:
    def __init__(self, stock, file_no):
        self.stock = stock
        self.file_no = file_no
    
    def add_missing_data(self, fact, value, units, unitsGroup, start_date, end_date):
        df = pd.read_csv(f"../aggregated_financial_data/{Sectors.INFORMATION_TECHNOLOGY.value}/{self.stock}-{self.file_no}.csv", parse_dates = ['start_date', 'end_date']).iloc[:, 1:]
        df.loc[len(df.index)] = ['us-gaap', fact, value*unitsGroup, units, start_date, end_date, self.stock, self.file_no]
        # Only focus on the Information Technology sector
        df.to_csv(f"../aggregated_financial_data/{Sectors.INFORMATION_TECHNOLOGY.value}/{self.stock}-{self.file_no}.csv")

In [53]:
# Manually insert missing financial values

# AAPL
aapl_5 = Financial_Data(Information_Technology.AAPL.value, 5)
aapl_5.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 265595, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-10-01'), pd.to_datetime('2018-09-29'))

aapl_6 = Financial_Data(Information_Technology.AAPL.value, 6)
aapl_6.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 229234, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-09-25'), pd.to_datetime('2017-09-30'))
aapl_6.add_missing_data('AssetsNoncurrent', 246674, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-09-25'), pd.to_datetime('2017-09-30'))
aapl_6.add_missing_data('LiabilitiesNoncurrent', 140458, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-09-25'), pd.to_datetime('2017-09-30'))
aapl_6.add_missing_data('OtherLiabilitiesCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-09-25'), pd.to_datetime('2017-09-30'))

aapl_7 = Financial_Data(Information_Technology.AAPL.value, 7)
aapl_7.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 215639, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-09-27'), pd.to_datetime('2016-09-24'))
aapl_7.add_missing_data('AssetsNoncurrent', 214817, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-09-27'), pd.to_datetime('2016-09-24'))
aapl_7.add_missing_data('LiabilitiesNoncurrent', 114431, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-09-27'), pd.to_datetime('2016-09-24'))
aapl_7.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 65824, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-09-27'), pd.to_datetime('2016-09-24'))
aapl_7.add_missing_data('OtherLiabilitiesCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-09-27'), pd.to_datetime('2016-09-24'))

aapl_8 = Financial_Data(Information_Technology.AAPL.value, 8)
aapl_8.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 233715, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-09-28'), pd.to_datetime('2015-09-26'))
aapl_8.add_missing_data('AssetsNoncurrent', 201101, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-09-28'), pd.to_datetime('2015-09-26'))
aapl_8.add_missing_data('LiabilitiesNoncurrent', 90514, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-09-28'), pd.to_datetime('2015-09-26'))
aapl_8.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 81266, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-09-28'), pd.to_datetime('2015-09-26'))
aapl_8.add_missing_data('OtherLiabilitiesCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-09-28'), pd.to_datetime('2015-09-26'))

aapl_9 = Financial_Data(Information_Technology.AAPL.value, 9)
aapl_9.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 182795, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('AssetsNoncurrent', 163308, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('LiabilitiesNoncurrent', 56844, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 59713, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('LongTermDebtNoncurrent', 28987, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('OtherLiabilitiesCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))
aapl_9.add_missing_data('LongTermDebtCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-09-29'), pd.to_datetime('2014-09-27'))

# GOOGL
googl_0 = Financial_Data(Information_Technology.GOOGL.value, 0)
googl_0.add_missing_data('AssetsNoncurrent', 200469, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-12-31'))
googl_0.add_missing_data('LiabilitiesNoncurrent', 39820, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-09-26'))
googl_0.add_missing_data('GrossProfit', 156633, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-09-26'))
googl_0.add_missing_data('LongTermDebtNoncurrent', 14701, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-09-26'))

# CostOfGoodsAndServicesSold | 
# LongTermDebtCurrent
# OtherLiabilitiesCurrent

googl_1 = Financial_Data(Information_Technology.GOOGL.value, 1)
googl_1.add_missing_data('AssetsNoncurrent', 171125, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-12-31'))
googl_1.add_missing_data('LiabilitiesNoncurrent', 43379, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-09-26'))
googl_1.add_missing_data('GrossProfit', 146698, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-09-26'))
googl_1.add_missing_data('LongTermDebtNoncurrent', 14817, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-09-26'))

googl_2 = Financial_Data(Information_Technology.GOOGL.value, 2)
googl_2.add_missing_data('AssetsNoncurrent', 145320, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))
googl_2.add_missing_data('LiabilitiesNoncurrent', 40238, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))
googl_2.add_missing_data('GrossProfit', 97795, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))
googl_2.add_missing_data('LongTermDebtNoncurrent', 13932, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))

googl_3 = Financial_Data(Information_Technology.GOOGL.value, 3)
googl_3.add_missing_data('LiabilitiesNoncurrent', 29246, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-12-31'), pd.to_datetime('2019-12-31'))
googl_3.add_missing_data('GrossProfit', 89961, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-12-31'), pd.to_datetime('2019-12-31'))

googl_4 = Financial_Data(Information_Technology.GOOGL.value, 4)
googl_4.add_missing_data('LiabilitiesNoncurrent', 20544, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-12-31'), pd.to_datetime('2018-12-31'))
googl_4.add_missing_data('GrossProfit', 77270, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-12-31'), pd.to_datetime('2018-12-31'))

googl_5 = Financial_Data(Information_Technology.GOOGL.value, 5)
googl_5.add_missing_data('LiabilitiesNoncurrent', 20610, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))
googl_5.add_missing_data('GrossProfit', 65272, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))

googl_6 = Financial_Data(Information_Technology.GOOGL.value, 6)
googl_6.add_missing_data('LiabilitiesNoncurrent', 11705, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))
googl_6.add_missing_data('GrossProfit', 55134, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))

googl_7 = Financial_Data(Information_Technology.GOOGL.value, 7)
googl_7.add_missing_data('LiabilitiesNoncurrent', 7820, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))
googl_7.add_missing_data('GrossProfit', 46825, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))

# META
meta_0 = Financial_Data(Information_Technology.META.value, 0)
meta_0.add_missing_data('AssetsNoncurrent', 126178, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-12-31'))
meta_0.add_missing_data('LiabilitiesNoncurrent', 32988, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-12-31'))
meta_0.add_missing_data('GrossProfit', 91360, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-12-31'), pd.to_datetime('2022-12-31'))

meta_1 = Financial_Data(Information_Technology.META.value, 1)
meta_1.add_missing_data('AssetsNoncurrent', 99321, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-12-31'))
meta_1.add_missing_data('LiabilitiesNoncurrent', 19973, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-12-31'))
meta_1.add_missing_data('GrossProfit', 95280, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-12-31'), pd.to_datetime('2021-12-31'))

meta_2 = Financial_Data(Information_Technology.META.value, 2)
meta_2.add_missing_data('AssetsNoncurrent', 83646, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))
meta_2.add_missing_data('LiabilitiesNoncurrent', 16045, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))
meta_2.add_missing_data('GrossProfit', 69273, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-12-31'), pd.to_datetime('2020-12-31'))

meta_3 = Financial_Data(Information_Technology.META.value, 3)
meta_3.add_missing_data('AssetsNoncurrent', 67151, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-12-31'), pd.to_datetime('2019-12-31'))
meta_3.add_missing_data('LiabilitiesNoncurrent', 17269, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-12-31'), pd.to_datetime('2019-12-31'))
meta_3.add_missing_data('GrossProfit', 57927, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-12-31'), pd.to_datetime('2019-12-31'))

meta_4 = Financial_Data(Information_Technology.META.value, 4)
meta_4.add_missing_data('AssetsNoncurrent', 46854, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-12-31'), pd.to_datetime('2018-12-31'))
meta_4.add_missing_data('LiabilitiesNoncurrent', 6190, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-12-31'), pd.to_datetime('2018-12-31'))
meta_4.add_missing_data('GrossProfit', 46483, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-12-31'), pd.to_datetime('2018-12-31'))

meta_5 = Financial_Data(Information_Technology.META.value, 5)
meta_5.add_missing_data('AssetsNoncurrent', 35961, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))
meta_5.add_missing_data('LiabilitiesNoncurrent', 6417, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))
meta_5.add_missing_data('GrossProfit', 35199, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))
meta_5.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 40653, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-12-31'), pd.to_datetime('2017-12-31'))

meta_6 = Financial_Data(Information_Technology.META.value, 6)
meta_6.add_missing_data('AssetsNoncurrent', 30560, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))
meta_6.add_missing_data('LiabilitiesNoncurrent', 2892, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))
meta_6.add_missing_data('GrossProfit', 23849, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))
meta_6.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 27638, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-12-31'), pd.to_datetime('2016-12-31'))

meta_7 = Financial_Data(Information_Technology.META.value, 7)
meta_7.add_missing_data('AssetsNoncurrent', 27755, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))
meta_7.add_missing_data('LiabilitiesNoncurrent', 3264, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))
meta_7.add_missing_data('GrossProfit', 15061, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))
meta_7.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 17928, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-12-31'), pd.to_datetime('2015-12-31'))

meta_8 = Financial_Data(Information_Technology.META.value, 8)
meta_8.add_missing_data('AssetsNoncurrent', 26576, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-12-31'), pd.to_datetime('2014-12-31'))
meta_8.add_missing_data('LiabilitiesNoncurrent', 2664, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-12-31'), pd.to_datetime('2014-12-31'))
meta_8.add_missing_data('GrossProfit', 10313, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-12-31'), pd.to_datetime('2014-12-31'))
meta_8.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 12466, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-12-31'), pd.to_datetime('2014-12-31'))

meta_9 = Financial_Data(Information_Technology.META.value, 9)
meta_9.add_missing_data('AssetsNoncurrent', 4825, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-12-31'), pd.to_datetime('2013-12-31'))
meta_9.add_missing_data('LiabilitiesNoncurrent', 1325, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-12-31'), pd.to_datetime('2013-12-31'))
meta_9.add_missing_data('GrossProfit', 5997, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-12-31'), pd.to_datetime('2013-12-31'))
meta_9.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 7872, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2013-12-31'), pd.to_datetime('2013-12-31'))

# MSFT
msft_0 = Financial_Data(Information_Technology.MSFT.value, 0)
msft_0.add_missing_data('AssetsNoncurrent', 227719, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-06-30'), pd.to_datetime('2023-06-30'))
msft_0.add_missing_data('LiabilitiesNoncurrent', 101604, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-06-30'), pd.to_datetime('2023-06-30'))

msft_1 = Financial_Data(Information_Technology.MSFT.value, 1)
msft_1.add_missing_data('AssetsNoncurrent', 195156, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-06-30'), pd.to_datetime('2022-06-30'))
msft_1.add_missing_data('LiabilitiesNoncurrent', 103216, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-06-30'), pd.to_datetime('2022-06-30'))

msft_2 = Financial_Data(Information_Technology.MSFT.value, 2)
msft_2.add_missing_data('AssetsNoncurrent', 149373, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-06-30'), pd.to_datetime('2021-06-30'))
msft_2.add_missing_data('LiabilitiesNoncurrent', 103134, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-06-30'), pd.to_datetime('2021-06-30'))

msft_3 = Financial_Data(Information_Technology.MSFT.value, 3)
msft_3.add_missing_data('AssetsNoncurrent', 119396, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-06-30'), pd.to_datetime('2020-06-30'))
msft_3.add_missing_data('LiabilitiesNoncurrent', 110697, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-06-30'), pd.to_datetime('2020-06-30'))

msft_4 = Financial_Data(Information_Technology.MSFT.value, 4)
msft_4.add_missing_data('AssetsNoncurrent', 111004, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-06-30'), pd.to_datetime('2019-06-30'))
msft_4.add_missing_data('LiabilitiesNoncurrent', 114806, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-06-30'), pd.to_datetime('2019-06-30'))

msft_5 = Financial_Data(Information_Technology.MSFT.value, 5)
msft_5.add_missing_data('AssetsNoncurrent', 89186, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-06-30'), pd.to_datetime('2018-06-30'))
msft_5.add_missing_data('LiabilitiesNoncurrent', 117642, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-06-30'), pd.to_datetime('2018-06-30'))

msft_6 = Financial_Data(Information_Technology.MSFT.value, 6)
msft_6.add_missing_data('AssetsNoncurrent', 87616, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-06-30'), pd.to_datetime('2017-06-30'))
msft_6.add_missing_data('LiabilitiesNoncurrent', 106856, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-06-30'), pd.to_datetime('2017-06-30'))

msft_7 = Financial_Data(Information_Technology.MSFT.value, 7)
msft_7.add_missing_data('AssetsNoncurrent', 54034, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-06-30'), pd.to_datetime('2016-06-30'))
msft_7.add_missing_data('LiabilitiesNoncurrent', 62340, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-06-30'), pd.to_datetime('2016-06-30'))
msft_7.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 33325, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-06-30'), pd.to_datetime('2016-06-30'))
msft_7.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 85320, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-06-30'), pd.to_datetime('2016-06-30'))

msft_8 = Financial_Data(Information_Technology.MSFT.value, 8)
msft_8.add_missing_data('AssetsNoncurrent', 51675, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-06-30'), pd.to_datetime('2015-06-30'))
msft_8.add_missing_data('LiabilitiesNoncurrent', 44742, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-06-30'), pd.to_datetime('2015-06-30'))
msft_8.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 29080, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-06-30'), pd.to_datetime('2015-06-30'))
msft_8.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 93580, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-06-30'), pd.to_datetime('2015-06-30'))

msft_9 = Financial_Data(Information_Technology.MSFT.value, 9)
msft_9.add_missing_data('AssetsNoncurrent', 58138, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-06-30'), pd.to_datetime('2014-06-30'))
msft_9.add_missing_data('LiabilitiesNoncurrent', 36975, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-06-30'), pd.to_datetime('2014-06-30'))
msft_9.add_missing_data('NetCashProvidedByUsedInOperatingActivities', 32231, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-06-30'), pd.to_datetime('2014-06-30'))
msft_9.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 86833, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-06-30'), pd.to_datetime('2014-06-30'))

# NVDA
nvda_0 = Financial_Data(Information_Technology.NVDA.value, 0)
nvda_0.add_missing_data('AssetsNoncurrent', 18109, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))
nvda_0.add_missing_data('LiabilitiesNoncurrent', 12518, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))
nvda_0.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 26974, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))
nvda_0.add_missing_data('CostOfGoodsAndServicesSold', 11618, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))
nvda_0.add_missing_data('OtherLiabilitiesCurrent', 4120, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))
nvda_0.add_missing_data('OtherAssetsCurrent', 791, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2023-01-29'), pd.to_datetime('2023-01-29'))

nvda_1 = Financial_Data(Information_Technology.NVDA.value, 1)
nvda_1.add_missing_data('AssetsNoncurrent', 15358, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-01-29'), pd.to_datetime('2022-01-29'))
nvda_1.add_missing_data('LiabilitiesNoncurrent', 13240, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-01-29'), pd.to_datetime('2022-01-29'))
nvda_1.add_missing_data('CostOfGoodsAndServicesSold', 9439, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-01-29'), pd.to_datetime('2022-01-29'))
nvda_1.add_missing_data('OtherLiabilitiesCurrent', 2552, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-01-29'), pd.to_datetime('2022-01-29'))
nvda_1.add_missing_data('OtherAssetsCurrent', 366, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2022-01-29'), pd.to_datetime('2022-01-29'))

nvda_2 = Financial_Data(Information_Technology.NVDA.value, 2)
nvda_2.add_missing_data('AssetsNoncurrent', 12736, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-01-29'), pd.to_datetime('2021-01-29'))
nvda_2.add_missing_data('LiabilitiesNoncurrent', 7973, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-01-29'), pd.to_datetime('2021-01-29'))
nvda_2.add_missing_data('OtherLiabilitiesCurrent', 1725, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-01-29'), pd.to_datetime('2021-01-29'))
nvda_2.add_missing_data('OtherAssetsCurrent', 239, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2021-01-29'), pd.to_datetime('2021-01-29'))

nvda_3 = Financial_Data(Information_Technology.NVDA.value, 3)
nvda_3.add_missing_data('AssetsNoncurrent', 3625, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))
nvda_3.add_missing_data('LiabilitiesNoncurrent', 3327, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))
nvda_3.add_missing_data('OtherLiabilitiesCurrent', 1097, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))
nvda_3.add_missing_data('OtherAssetsCurrent', 157, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))
nvda_3.add_missing_data('LongTermDebtCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))
nvda_3.add_missing_data('LongTermDebtNoncurrent', 5964, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2020-01-29'), pd.to_datetime('2020-01-29'))

nvda_4 = Financial_Data(Information_Technology.NVDA.value, 4)
nvda_4.add_missing_data('AssetsNoncurrent', 2735, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))
nvda_4.add_missing_data('LiabilitiesNoncurrent', 2621, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))
nvda_4.add_missing_data('OtherLiabilitiesCurrent', 818, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))
nvda_4.add_missing_data('OtherAssetsCurrent', 136, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))
nvda_4.add_missing_data('LongTermDebtCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))
nvda_4.add_missing_data('LongTermDebtNoncurrent', 1988, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2019-01-29'), pd.to_datetime('2019-01-29'))

nvda_5 = Financial_Data(Information_Technology.NVDA.value, 5)
nvda_5.add_missing_data('AssetsNoncurrent', 1986, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('LiabilitiesNoncurrent', 2617, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 9714, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('CostOfGoodsAndServicesSold', 3892, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('OtherLiabilitiesCurrent', 542, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('OtherAssetsCurrent', 86, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('LongTermDebtCurrent', 15, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))
nvda_5.add_missing_data('LongTermDebtNoncurrent', 1985, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2018-01-29'), pd.to_datetime('2018-01-29'))

nvda_6 = Financial_Data(Information_Technology.NVDA.value, 6)
nvda_6.add_missing_data('AssetsNoncurrent', 1305, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('LiabilitiesNoncurrent', 2260, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 6910, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('CostOfGoodsAndServicesSold', 2847, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('OtherLiabilitiesCurrent', 507, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('OtherAssetsCurrent', 118, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('LongTermDebtCurrent', 796, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))
nvda_6.add_missing_data('LongTermDebtNoncurrent', 1983, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2017-01-29'), pd.to_datetime('2017-01-29'))

nvda_7 = Financial_Data(Information_Technology.NVDA.value, 7)
nvda_7.add_missing_data('AssetsNoncurrent', 1317, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('LiabilitiesNoncurrent', 463, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 5010, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('CostOfGoodsAndServicesSold', 2199, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('OtherLiabilitiesCurrent', 642, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('OtherAssetsCurrent', 93, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('LongTermDebtCurrent', 1413, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))
nvda_7.add_missing_data('LongTermDebtNoncurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2016-01-29'), pd.to_datetime('2016-01-29'))

nvda_8 = Financial_Data(Information_Technology.NVDA.value, 8)
nvda_8.add_missing_data('AssetsNoncurrent', 1488, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('LiabilitiesNoncurrent', 1887, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 4682, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('CostOfGoodsAndServicesSold', 2082.03, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('OtherLiabilitiesCurrent', 602.807, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('OtherAssetsCurrent', 70.174, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('LongTermDebtCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))
nvda_8.add_missing_data('LongTermDebtNoncurrent', 1384.342, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2015-01-29'), pd.to_datetime('2015-01-29'))

nvda_9 = Financial_Data(Information_Technology.NVDA.value, 9)
nvda_9.add_missing_data('AssetsNoncurrent', 1626.183, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('LiabilitiesNoncurrent', 1849, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('RevenueFromContractWithCustomerExcludingAssessedTax', 1849, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('CostOfGoodsAndServicesSold', 1862.399, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('OtherLiabilitiesCurrent', 621.105, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('OtherAssetsCurrent', 70.285, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('LongTermDebtCurrent', 0, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
nvda_9.add_missing_data('LongTermDebtNoncurrent', 1356.375, 'USD', Units_Group.MILLIONS.value, pd.to_datetime('2014-01-29'), pd.to_datetime('2014-01-29'))
