# P4 - Calculate Price-to-Book-Value Ratio

## Overview:

This project aims to calculate price-to-book-value for the s&p500 constituents. Price/Book Value is defined as Month-end price divided by latest reported book value per share.
Techniques used in the project:

1. Processing data exceptions and outliers.  
2. Working with missing data. 

In [22]:
import yfinance as yf
import pandas as pd
import requests
import json
import numpy as np
import pickle
import copy
import datetime as dt
import os

data_folder_download = '../datasets/download/'
data_folder_generate = '../datasets/generate/'

%store -r removed_tickers_list
%store -r sp500_tickers

## Step 1: Read in all the necessary data files 

In [23]:
with open(data_folder_generate + 'sp500_financial_data_v2.0.pkl', 'rb') as f: 
    current_sp500_financial_data = pickle.load(f)

In [24]:
with open(data_folder_generate + 'sp500_df_v2.0.pkl', 'rb') as f: 
    sp500_df = pickle.load(f)

In [25]:
with open(data_folder_generate + 'removed_sp500_financial_data_v2.1.pkl', 'rb') as f: 
    removed_sp500_financial_data = pickle.load(f)

In [26]:
with open(data_folder_generate + 'sp500_df_v1.0.pkl', 'rb') as f: 
    current_sp500_df = pickle.load(f)

In [44]:
with open(data_folder_generate + 'price_df.pkl', 'rb') as f: 
    price_df = pickle.load(f)

## Step 2: Calculate book_value_per_share

In [27]:
def calculate_bvps(sp500_financial_data_BS):

    book_value_per_share_dict = {}  # Collect results in a dictionary first
    
    for ticker in sp500_financial_data_BS.keys():
        equity_non_null_count = sp500_financial_data_BS[ticker]['StockholdersEquity']['val'].notnull().sum()
        equity_noncontrolling_non_null_count = sp500_financial_data_BS[ticker]['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest']['val'].notnull().sum()
        
        if equity_non_null_count <= equity_noncontrolling_non_null_count:
            if sp500_financial_data_BS[ticker]['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest'].empty:
                if sp500_financial_data_BS[ticker]['Liabilities'].empty:
                    book_value = (
                    sp500_financial_data_BS[ticker]['LiabilitiesAndStockholdersEquity']['val']
                    - sp500_financial_data_BS[ticker]['LiabilitiesCurrent']['val']
                    - sp500_financial_data_BS[ticker]['LiabilitiesNoncurrent']['val']
                    )
                else: 
                    book_value = (
                        sp500_financial_data_BS[ticker]['LiabilitiesAndStockholdersEquity']['val']
                        - sp500_financial_data_BS[ticker]['Liabilities']['val']
                    )
            else: 
                book_value = (
                sp500_financial_data_BS[ticker]['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest']['val'] 
            )
        else:
            book_value = (
                sp500_financial_data_BS[ticker]['StockholdersEquity']['val'] 
            )
        # Initialize completeness counts for each measure
        common_shares_non_null_count = sp500_financial_data_BS[ticker]['CommonStockSharesOutstanding']['val'].notnull().sum()
        diluted_shares_non_null_count = sp500_financial_data_BS[ticker]['WeightedAverageNumberOfDilutedSharesOutstanding']['val'].notnull().sum()
        basic_shares_non_null_count = sp500_financial_data_BS[ticker]['WeightedAverageNumberOfSharesOutstandingBasic']['val'].notnull().sum()
    
        # Determine the measure with the most non-null values
        if diluted_shares_non_null_count >= max(common_shares_non_null_count, basic_shares_non_null_count):
            # Use 'WeightedAverageNumberOfDilutedSharesOutstanding' if it is the most complete
            book_value_per_share = book_value.loc['2013-03-31': ].divide(
                sp500_financial_data_BS[ticker]['WeightedAverageNumberOfDilutedSharesOutstanding'].loc['2013-03-31': , 'val']
            )
            
        elif basic_shares_non_null_count > max(common_shares_non_null_count, diluted_shares_non_null_count):
        # Use 'WeightedAverageNumberOfSharesOutstandingBasic' if it is the most complete
            book_value_per_share = book_value.loc['2013-03-31': ].divide(
            sp500_financial_data_BS[ticker]['WeightedAverageNumberOfSharesOutstandingBasic'].loc['2013-03-31': , 'val']
        )
        
        elif common_shares_non_null_count > max(diluted_shares_non_null_count, basic_shares_non_null_count):
            # Use 'CommonStockSharesOutstanding' if it is the most complete
            book_value_per_share = book_value.loc['2013-03-31': ].divide(
                sp500_financial_data_BS[ticker]['CommonStockSharesOutstanding'].loc['2013-03-31': , 'val']
            )
    
        # Store result in a dictionary
        book_value_per_share_dict[ticker] = book_value_per_share

    return book_value_per_share_dict

In [34]:
current_book_value_per_share_dict = calculate_bvps(current_sp500_financial_data)

In [31]:
check_result = check_accuracy(removed_book_value_per_share_dict, removed_tickers_list)

In [30]:
# check if the accuracy of the calculation.
def check_accuracy(book_value_per_share_dict, sp500_tickers): 
    check_result = {}
    for ticker in sp500_tickers: 
        count = book_value_per_share_dict[ticker].count()
        check_result[ticker] = count
    return check_result

In [32]:
sorted_dict = dict(sorted(check_result.items(), key=lambda item: item[1]))
sorted_dict

{'STI': 5,
 'SE': 7,
 'SAI': 8,
 'IGT': 11,
 'MBC': 11,
 'ADT': 12,
 'NE': 14,
 'S': 17,
 'PLL': 18,
 'BEAM': 19,
 'OGN': 19,
 'VNT': 20,
 'DNB': 23,
 'XRX': 25,
 'ALTR': 29,
 'BHF': 29,
 'ZION': 29,
 'FL': 30,
 'FTI': 32,
 'DXC': 34,
 'DELL': 35,
 'AA': 36,
 'SEDG': 40,
 'SWN': 42,
 'ANF': 43,
 'GME': 43,
 'UA': 43,
 'UAA': 43,
 'BTU': 44,
 'PRGO': 44,
 'SEE': 44,
 'AYI': 45,
 'BIG': 45,
 'COTY': 45,
 'CPRI': 45,
 'FOSL': 45,
 'GPS': 45,
 'HBI': 45,
 'HRB': 45,
 'ILMN': 45,
 'KSS': 45,
 'PDCO': 45,
 'VFC': 45,
 'AAP': 46,
 'AIV': 46,
 'ALK': 46,
 'AMG': 46,
 'AN': 46,
 'ATI': 46,
 'CHK': 46,
 'CLF': 46,
 'CMA': 46,
 'CNX': 46,
 'DO': 46,
 'FHN': 46,
 'FLR': 46,
 'FLS': 46,
 'GHC': 46,
 'GNW': 46,
 'GT': 46,
 'HOG': 46,
 'HP': 46,
 'IPGP': 46,
 'JEF': 46,
 'JWN': 46,
 'LEG': 46,
 'LNC': 46,
 'LUMN': 46,
 'M': 46,
 'MAC': 46,
 'MAT': 46,
 'MUR': 46,
 'NAVI': 46,
 'NBR': 46,
 'NKTR': 46,
 'NOV': 46,
 'NWL': 46,
 'OI': 46,
 'PBI': 46,
 'PENN': 46,
 'PVH': 46,
 'R': 46,
 'RHI': 46,
 'RIG':

## Step 3: Uncover the reasons for the outliers. 
'A': 47,
 'ADI': 47,
 'AMAT': 47,
 'ADSK': 47,
 'BBWI': 47,
 'BF.B': 47,
 'COO': 47,
 'DG': 47,
 'HD': 47,
 'INTU': 47,
 'LDOS': 47,
 'LULU': 47,
 'NTAP': 47,
 'NDSN': 47,
 'NVDA': 47,
 'SJM': 47,
 'ULTA': 47,
 'WMT': 47,
 'JCI': 48,
 'KDP': 48,
 'DE': 49,
 'GPN': 50

#### Step 3.1 Handling exceptions: 

In [116]:

# current_sp500_financial_data['DE']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2015-11-01','2016-10-30', '2016-05-01'], inplace = True)
# current_sp500_financial_data['DE']['StockholdersEquity'].drop(['2016-05-01','2016-10-30'], inplace = True)
# current_sp500_financial_data['DE']['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest'].drop(['2014-11-02', '2015-11-01', '2016-05-01', '2016-10-30'], inplace = True)

# current_sp500_financial_data['GPN']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2016-09-30','2016-03-31', '2016-06-30', '2016-12-31'], inplace = True)
# current_sp500_financial_data['GPN']['StockholdersEquity'].drop(['2016-11-30'], inplace = True)
# current_sp500_financial_data['GPN']['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest'].drop(['2015-12-31', '2016-03-31', '2016-06-30', '2016-09-30', '2016-12-31'], inplace = True)

# current_sp500_financial_data['KDP']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2016-03-02','2016-09-24', '2016-12-24'], inplace = True)
# current_sp500_financial_data['KDP']['StockholdersEquity'].drop(['2015-09-26', '2016-03-02', '2016-09-24'], inplace = True)
# current_sp500_financial_data['JCI']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2016-03-25','2016-06-24'], inplace = True)
# current_sp500_financial_data['JCI']['StockholdersEquity'].drop(['2013-09-30', '2014-09-30', '2015-09-30'], inplace = True)

# removed_sp500_financial_data['ATI']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2022-01-02', '2023-01-01', '2023-04-02', '2023-07-02'], inplace = True)
# removed_sp500_financial_data['ATI']['StockholdersEquity'].drop(['2023-01-01'], inplace = True)
# removed_sp500_financial_data['ATI']['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest'].drop(['2021-01-03', '2022-01-02', '2023-01-01', '2023-04-02', '2023-07-02'], inplace = True)

# removed_sp500_financial_data['HRB']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2020-06-30', '2020-09-30', '2020-12-31', '2021-03-31', '2021-12-31'], inplace = True)
# removed_sp500_financial_data['HRB']['StockholdersEquity'].drop(['2020-05-01', '2020-06-30', '2020-09-30', '2020-12-31', '2021-03-31', '2021-05-01', '2021-12-31'], inplace = True)

# removed_sp500_financial_data['DO']['StockholdersEquity'].drop(['2021-04-23', '2021-04-24'], inplace = True)
# removed_sp500_financial_data['CHK']['WeightedAverageNumberOfDilutedSharesOutstanding'].drop(['2021-02-09'], inplace = True)
# removed_sp500_financial_data['CHK']['StockholdersEquity'].drop(['2021-02-09'], inplace = True)
#No shares outstanding data available on SEC: STZ and V

In [105]:
with open(data_folder_generate + 'sp500_financial_data_v2.1.pkl', 'wb') as f: 
    pickle.dump(current_sp500_financial_data, f)

In [117]:
with open(data_folder_generate + 'removed_sp500_financial_data_v2.1.pkl', 'wb') as f: 
    pickle.dump(removed_sp500_financial_data, f)

## Step 4: Analyze financial data set available among the S&P 500 tickers on SEC

In [347]:
# Identify the relevant financial labels in the database. 
item_list = {}
for ticker in sp500_tickers: 
    abt_fact= get_facts(ticker)
    item_list[ticker] = [item for item in abt_fact['facts']['us-gaap'].keys() if 'liabilities' in item.lower()]

In [None]:
# count the most complete financial data label
from collections import Counter

# Flatten the list of items across all tickers into a single list
all_items = [item for sublist in item_list.values() for item in sublist]

# Count the occurrences of each item
item_counts = Counter(all_items)

# Sort and rank items by most common
most_common_items = item_counts.most_common()

# Display the ranked items
for item, count in most_common_items:
    print(f"{item}: {count}")

In [402]:
length_list = {}
for key in book_value_per_share_dict.keys(): 
    length_list[key] = len(book_value_per_share_dict[key].values)

In [None]:
for key in book_value_per_share_dict.keys(): 
    book_value_per_share_dict[key] = book_value_per_share_dict[key].dropna(axis = 0)

### Availability Summary of the financial data on SEC

for shareholder's equity: 
- LiabilitiesAndStockholdersEquity: 503
- StockholdersEquity: 495
- StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest: 402

for earnings per share: 
- EarningsPerShareDiluted: 498 
- EarningsPerShareBasic: 498

for shares_outstanding data: 
- CommonStockSharesOutstanding: 397
- WeightedAverageNumberOfDilutedSharesOutstanding: 498
- WeightedAverageNumberOfSharesOutstandingBasic: 497

for assets and liabilities: 
- LiabilitiesAndStockholdersEquity: 503
- LiabilitiesCurrent: 424
- Liabilities: 367
- LiabilitiesNoncurrent: 897

## Step 5: Combining current and removed tickers bvps results

In [35]:
len(current_book_value_per_share_dict)

503

In [36]:
len(removed_book_value_per_share_dict)

100

In [37]:
# Create combined_book_value_per_share_dict
combined_bvps_dict = {**current_book_value_per_share_dict, **removed_book_value_per_share_dict}

In [125]:
combined_bvps_dict.keys()

dict_keys(['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BK', 'BBWI', 'BAX', 'BDX', 'BRK.B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BX', 'BA', 'BKNG', 'BWA', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CRWD', 'CCI', 'CSX'

## Step 6: Calculate Price-to-Book-Value

### Step 6.1: Create monthly price table for all the tickers

In [4]:
price = {}
folder = '../datasets/download/companyprice/'
path = [f for f in os.listdir(folder) if not f.startswith('.')]  # Exclude hidden files
for filename in path: 
    ticker = filename[:-4]
    df = pd.read_csv(folder + filename, index_col = 'Date', parse_dates = True)
    df.index = pd.to_datetime(df.index)
    price[ticker] = df.resample('M').last()

In [5]:
price_df = pd.concat(price.values(), axis = 1, keys = price.keys())
price_df.columns = price_df.columns.droplevel(1)
price_df = price_df.iloc[2: ]

In [6]:
price_df.columns = price_df.columns.str.replace('-', '.')

In [7]:
price_df.dropna(how = 'all', axis = 1, inplace = True)

In [14]:
with open(data_folder_generate + 'price_df.pkl', 'wb') as f:
    pickle.dump(price_df, f)

In [15]:
price_df.head()

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,ABBV,ABNB,ABT,ACGL,ACN,...,XEL,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-03-31,27.278889,19.016108,15.998322,74.5616,13.54761,25.616907,,28.342796,17.523333,61.554775,...,20.615463,55.978901,38.445984,12.640043,23.703783,41.507812,66.672211,47.130001,19.427004,30.671997
2013-04-30,26.934406,18.971474,15.932325,75.671204,13.551282,29.195095,,29.738949,17.686666,66.695747,...,22.066179,55.283142,38.36446,12.610646,23.867195,39.501877,67.762436,46.650002,19.139362,30.383537
2013-05-31,29.540754,19.035858,16.563971,73.542175,13.855092,27.064905,,29.537575,17.07,67.244415,...,19.935219,56.591373,37.829987,12.919297,24.300058,39.287334,69.58831,45.66,21.852919,29.444981
2013-06-30,27.869995,17.512991,15.479815,73.278946,12.21613,26.209026,,28.095737,17.136667,58.93203,...,19.860083,56.516281,37.163445,13.417597,23.263811,40.209366,66.601692,43.439999,22.514654,28.423599
2013-07-31,29.153996,17.804117,18.24205,74.470634,13.941354,29.098808,,29.624743,18.056667,60.447117,...,20.988346,58.643066,38.905491,14.349576,21.528093,42.480286,74.19146,46.169998,23.075186,27.489218


### Step 6.2: Forward Fill the bvps file on a monthly basis.

In [38]:
for ticker in combined_bvps_dict.keys():
    # Forward fill quarterly data to align with monthly price data
    combined_bvps_dict[ticker] = combined_bvps_dict[ticker].resample('M').ffill()

In [39]:
combined_bvps_df = pd.concat(combined_bvps_dict.values(), axis = 1, keys = combined_bvps_dict.keys())

In [41]:
combined_bvps_df.head()

Unnamed: 0_level_0,MMM,AOS,ABT,ABBV,ACN,ADBE,AMD,AES,AFL,A,...,URBN,VFC,VNO,VNT,WHR,WU,X,XRAY,XRX,ZION
end,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-03-31,33.395818,,12.953337,1.669678,,,,10.856942,27.406419,,...,,,42.769972,,56.827757,1.610672,24.045222,15.640315,,34.579255
2013-04-30,33.395818,,12.953337,1.669678,,,,10.856942,27.406419,18.230241,...,9.582729,,42.769972,,56.827757,1.610672,24.045222,15.640315,,34.579255
2013-05-31,33.395818,,12.953337,1.669678,9.140954,15.346261,,10.856942,27.406419,18.230241,...,9.582729,,42.769972,,56.827757,1.610672,24.045222,15.640315,,34.579255
2013-06-30,33.019106,,12.807359,2.009034,9.140954,15.346261,0.219304,10.949509,24.162106,18.230241,...,9.582729,11.625636,42.197033,,57.435265,1.663263,24.044018,15.765512,,37.271854
2013-07-31,33.019106,,12.807359,2.009034,9.140954,15.346261,0.219304,10.949509,24.162106,16.450172,...,10.170538,11.625636,42.197033,,57.435265,1.663263,24.044018,15.765512,,37.271854


In [36]:
pd.reset_option('display.max_rows')

In [42]:
# Count the number of NaNs in each column
na_count = combined_bvps_df.iloc[-4:].isna().sum()

# Select columns where the number of NaNs is fewer than 4
cols_to_fill = na_count[na_count < 4].index

# Apply forward fill to these columns
combined_bvps_df[cols_to_fill] = combined_bvps_df[cols_to_fill].ffill()

### Step 6.3: Divide price by bvps

In [45]:
price_book_value_df = price_df / combined_bvps_df

In [46]:
price_book_value_df.head()

Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,ABBV,ABNB,ABT,ACGL,ACN,...,XEL,XOM,XRAY,XRX,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-03-31,,,-1.375791,,,15.342422,,2.188069,1.253278,,...,1.114088,1.450599,2.458134,,2.82979,,2.408995,,0.561811,19.318274
2013-04-30,1.477458,,-1.370115,4.396082,,17.485463,,2.295852,1.26496,,...,1.192487,1.43257,2.452921,,2.849299,,2.448387,,0.553493,19.136593
2013-05-31,1.620426,,-1.424434,4.272397,,16.209654,,2.280306,1.220856,7.356389,...,1.077327,1.46647,2.418748,,2.900974,,2.51436,,0.631966,18.545457
2013-06-30,1.528778,,-1.357125,4.257105,1.519978,13.045583,,2.193718,1.249323,6.447033,...,1.061943,1.460106,2.357262,,2.761024,5.173135,2.334045,2.511219,0.604066,15.748677
2013-07-31,1.772261,,-1.599292,3.989762,1.734636,14.483977,,2.313103,1.316394,6.61278,...,1.122273,1.515052,2.467759,,2.555024,5.4653,2.600027,2.669037,0.619105,15.230964


In [47]:
with open(data_folder_generate + 'price_book_value_df_v1.1.pkl', 'wb') as f:
    pickle.dump(price_book_value_df, f) # reflected on the updated weighting. 

In [3]:
with open(data_folder_generate + 'price_book_value_df_v1.1.pkl', 'rb') as f:
    price_book_value_df = pickle.load(f) # reflected on the updated weighting. 

### Step 6.4 Adjust for shares outstanding

In [50]:
sp_500_financial2 = copy.deepcopy(removed_sp500_financial_data)

In [109]:
for ticker in sp_500_financial2.keys():
    # First, check if the 'WeightedAverageNumberOfDilutedSharesOutstanding' column exists and is not empty
    df = sp_500_financial2[ticker].get('WeightedAverageNumberOfDilutedSharesOutstanding')
    
    if df is not None and not df.empty:
        last_value = df.iloc[-1]['val']
        sp_500_financial2[ticker]['WeightedAverageNumberOfDilutedSharesOutstanding']['val'] = last_value
    else:
        # If 'WeightedAverageNumberOfDilutedSharesOutstanding' is empty or does not exist,
        # check 'WeightedAverageNumberOfSharesOutstandingBasic'
        df = sp_500_financial2[ticker].get('WeightedAverageNumberOfSharesOutstandingBasic')
        
        if df is not None and not df.empty:
            last_value = df.iloc[-1]['val']
            sp_500_financial2[ticker]['WeightedAverageNumberOfSharesOutstandingBasic']['val'] = last_value
        else:
            # If both of the above are empty or do not exist, check 'CommonStockSharesOutstanding'
            df = sp_500_financial2[ticker].get('CommonStockSharesOutstanding')
            
            if df is not None and not df.empty:
                last_value = df.iloc[-1]['val']
                sp_500_financial2[ticker]['CommonStockSharesOutstanding']['val'] = last_value
            else:
                print(f"No relevant data for {ticker}") 

In [87]:
with open(data_folder_generate + 'sp500_financial_data_v2.0.pkl', 'wb') as f: 
    pickle.dump(sp_500_financial2, f)

In [110]:
with open(data_folder_generate + 'removed_sp500_financial_data_v2.0.pkl', 'wb') as f: 
    pickle.dump(sp_500_financial2, f)