# Fundamentals using Sharadar
Sharadar does have recycled tickers (although not all, see AAC).

In [1]:
from datetime import datetime, date, time, timedelta
from times import get_market_dates, get_market_calendar, last_trading_date_before
from data import get_data
from tickers import get_tickers
from polygon.rest import RESTClient
import json
import numpy as np
import ast
import pandas as pd

In [34]:
fundamentals = pd.read_parquet('../data/other/sharadar_SF1.parquet')
fundamentals['datekey'] = pd.to_datetime(fundamentals['datekey']).dt.date


Create ticker mapping based on cik

In [35]:
sharadar_tickers = pd.read_csv('../data/other/sharadar_tickers.csv', index_col=0)
ticker_cik_map = sharadar_tickers[['ticker', 'cik']]
ticker_cik_map.head(3)

Unnamed: 0_level_0,ticker,cik
None,Unnamed: 1_level_1,Unnamed: 2_level_1
0,PODC,1940177
1,SWIN,1959224
2,IVP,1939365


In [36]:
fundamentals = fundamentals.merge(ticker_cik_map, how='left', left_on='ticker', right_on='ticker')
fundamentals = fundamentals[~fundamentals['cik'].isna()]
fundamentals['cik'] = fundamentals['cik'].astype(int)
fundamentals = fundamentals.set_index('cik')
fundamentals = fundamentals[['datekey', 'ticker', 'marketcap']]

In [37]:
len(fundamentals)

2886167

In [38]:
len(fundamentals[fundamentals['marketcap'].isna()])

276333

In [39]:
fundamentals = fundamentals[~fundamentals['marketcap'].isna()]
fundamentals = fundamentals.sort_values(by=['ticker', 'datekey']).drop_duplicates()
fundamentals['marketcap_M'] = fundamentals['marketcap'] / 1_000_000
fundamentals.head(3)

Unnamed: 0_level_0,datekey,ticker,marketcap
cik,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1090872,2000-01-25,A,30595880000.0
1090872,2000-01-31,A,29917880000.0
1090872,2000-03-15,A,51663600000.0


In [None]:
rows = [] # List of dataframes
tickers = get_tickers(4, types=['CS', 'ADRC'])
for index, row in tickers.iterrows():
    id = row['ID']
    ticker = id[:-11]

    country = row['country']
    cik = row['cik']
    sic = row['sic']
    start_data = row['start_data']
    end_data = row['end_data']

    if pd.isna(cik):
        continue

    try:
        stock_fundamentals = fundamentals.loc[cik]
    except KeyError:
        continue
    if isinstance(stock_fundamentals, pd.Series):
        stock_fundamentals = fundamentals.loc[[cik]]

    stock_fundamentals = stock_fundamentals[(stock_fundamentals.datekey >= start_data) & \
        (stock_fundamentals.datekey <= end_data)]
        
    stock_fundamentals = stock_fundamentals[['datekey', 'marketcap_M']]
    stock_fundamentals = stock_fundamentals.rename(columns={'datekey': 'date'})
    stock_fundamentals.index = [id] * len(stock_fundamentals)

    stock_fundamentals['country'] = country
    stock_fundamentals['sic'] = sic
            
    rows.append(stock_fundamentals)
    print(index)

market_cap_df = pd.concat(rows)
market_cap_df.index.names = ['ID']
market_cap_df.to_csv('../data/other/sharadar_processed.csv')

In [4]:
tickers = get_tickers(4, types=['CS', 'ADRC'])

In [5]:
fundamentals = pd.read_csv('../data/other/sharadar_processed.csv')
grouped_by_marketcap = fundamentals.groupby('ID').agg({'marketcap_M': 'last'}).dropna()

print(f'Amount of tickers: {len(tickers)}')
print(f"Amount of stocks with marketcap: {len(grouped_by_marketcap)}")

Amount of tickers: 17638
Amount of stocks with marketcap: 14334


In [6]:
no_market_cap_tickers = []
for index, row in get_tickers(4, types=['CS', 'ADRC']).iterrows():
    id = row['ID']
    if id not in grouped_by_marketcap.index:
        no_market_cap_tickers.append(id)

no_market_cap = tickers[tickers['ID'].isin(no_market_cap_tickers)]\
    [['ID', 'name', 'start_data', 'end_data', 'type', 'cik', 'composite_figi']]
no_market_cap.to_csv('../data/other/SHARADAR_no_marketcap.csv')

The tickers that we could not match with CIK should be matched with ticker (taking into account ticker changes) or name.