# Fundamentals using Compustat
**I have decided to not use Compustat data for fundamentals as there are a lot of missing values.** This is surprising as Compustat is research-grade data. Maybe I did something wrong with my code, or Polygon's cik data is bad.

In [1]:
from datetime import datetime, date, time, timedelta
from times import get_market_dates, get_market_calendar, last_trading_date_before
from data import get_data
from tickers import get_tickers
from polygon.rest import RESTClient
import json
import numpy as np
import ast
import pandas as pd

### Calculating market cap 

In [19]:
fundamentals = pd.read_csv('../data/other/compustat.csv')
fundamentals['marketcap_M'] = fundamentals['cshoq'] * fundamentals['prccq']
fundamentals = fundamentals[fundamentals['curcdq'] == 'USD']
fundamentals.to_csv('../data/other/compustat.csv', index=False)

In [21]:
fundamentals = pd.read_csv('../data/other/compustat.csv', index_col='cik')
fundamentals.index = fundamentals.index.fillna(0).astype(int)
fundamentals['datadate'] = pd.to_datetime(fundamentals['datadate']).dt.date
fundamentals.loc[320193].head(3)

Unnamed: 0_level_0,gvkey,datadate,tic,cusip,conm,curcdq,cshoq,epsfxq,exchg,costat,fic,prccq,loc,marketcap_M
cik,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
320193,1690,2000-03-31,AAPL,37833100,APPLE INC,USD,162.68,1.28,14.0,A,USA,135.8125,USA,22093.9775
320193,1690,2000-06-30,AAPL,37833100,APPLE INC,USD,324.826,0.55,14.0,A,USA,52.375,USA,17012.76175
320193,1690,2000-09-30,AAPL,37833100,APPLE INC,USD,335.677,0.47,14.0,A,USA,25.75,USA,8643.68275


In [None]:
rows = [] # List of dataframes
tickers = get_tickers(4, types=['CS', 'ADRC'])
for index, row in tickers.iterrows():
    id = row['ID']
    ticker = id[:-11]

    country = row['country']
    cik = row['cik']
    sic = row['sic']
    start_data = row['start_data']
    end_data = row['end_data']

    if pd.isna(cik):
        continue

    try:
        stock_fundamentals = fundamentals.loc[cik]
    except KeyError:
        continue
    if isinstance(stock_fundamentals, pd.Series):
        stock_fundamentals = fundamentals.loc[[cik]]

    stock_fundamentals = stock_fundamentals[(stock_fundamentals.datadate >= start_data) & \
        (stock_fundamentals.datadate <= end_data)]
        
    stock_fundamentals = stock_fundamentals[['datadate', 'marketcap_M']]
    stock_fundamentals = stock_fundamentals.rename(columns={'datadate': 'date'})
    stock_fundamentals.index = [id] * len(stock_fundamentals)

    stock_fundamentals['country'] = country
    stock_fundamentals['sic'] = sic
            
    rows.append(stock_fundamentals)
    print(index)

market_cap_df = pd.concat(rows)
market_cap_df.index.names = ['ID']
market_cap_df.to_csv('../data/other/compustat_processed.csv')

In [5]:
fundamentals = pd.read_csv('../data/other/compustat_processed.csv')
grouped_by_marketcap = fundamentals.groupby('ID').agg({'marketcap_M': 'last'}).dropna()

print(f'Amount of tickers: {len(tickers)}')
print(f"Amount of stocks with marketcap: {len(grouped_by_marketcap)}")

Amount of tickers: 17647
Amount of stocks with marketcap: 12654


In [6]:
no_market_cap_tickers = []
for index, row in get_tickers(4, types=['CS', 'ADRC']).iterrows():
    id = row['ID']
    if id not in grouped_by_marketcap.index:
        no_market_cap_tickers.append(id)

no_market_cap = tickers[tickers['ID'].isin(no_market_cap_tickers)]\
    [['ID', 'name', 'start_data', 'end_data', 'type', 'cik', 'composite_figi']]
no_market_cap.to_csv('../data/other/COMPUSTAT_no_marketcap.csv')

I have decided to not use Compustat data. (And I will lose access anyways)