# Ingest SEC DERA data into Trino pipeline

Copyright (C) 2021 OS-Climate

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Contributed by Michael Tiemann (Github: MichaelTiemannOSC)

Run these in a notebook cell if you need to install onto your nb env

```python
# 'capture' magic prevents long outputs from spamming your notebook
%%capture pipoutput

# For loading predefined environment variables from files
# Typically used to load sensitive access credentials
%pip install python-dotenv

# Standard python package for interacting with S3 buckets
%pip install boto3

# Interacting with Trino and using Trino with sqlalchemy
%pip install trino sqlalchemy sqlalchemy-trino

# Pandas and parquet file i/o
%pip install pandas pyarrow fastparquet

# OS-Climate utilities to make data ingest easier
%pip install osc-ingest-tools
```

In [None]:
from dotenv import dotenv_values, load_dotenv
from osc_ingest_trino import *
import os
import pathlib

Load Environment Variables

In [None]:
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [None]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

In [None]:
from osc_ingest_trino import *

trino_bucket = attach_s3_bucket("S3_DEV")

In [None]:
import boto3

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

Open a Trino connection using JWT for authentication

In [None]:
ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'sec_dera'

In [None]:
# Show available schemas to ensure trino connection is set correctly
schema_read = engine.execute(f'show schemas in {ingest_catalog}')
for row in schema_read.fetchall():
    print(row)

Enter the Pandas!

In [None]:
import pandas as pd
import io

Drop previous tables and schema to start with a fresh slate

In [None]:
sql = f"show tables in {ingest_schema}"
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

In [None]:
for view in [ 'assets_by_adsh_ddate', 'assets_usd_by_adsh_ddate', 'assets_xyz_by_adsh_ddate',
               'assets_by_lei', 'assets_usd_by_lei', 'assets_xyz_by_lei',
               'cash_by_adsh_ddate', 'cash_by_lei', 'cash_usd_by_lei', 'cash_xyz_by_lei',
               'debt_by_adsh_ddate', 'debt_by_lei', 'debt_usd_by_lei', 'debt_xyz_by_lei',
               'financials_by_lei',
               'float_by_adsh_ddate', 'float_by_lei', 'float_usd_by_lei', 'float_xyz_by_lei',
               'fy_revenue_by_lei', 'fy_revenue_usd_by_lei', 'fy_revenue_xyz_by_lei',
               'fy_income_by_lei', 'fy_income_usd_by_lei', 'fy_income_xyz_by_lei',
            ]:
    sql = f"""
drop view if exists {ingest_catalog}.{ingest_schema}.{view}
"""
    # print(sql)
    qres = engine.execute(sql)
    # print(qres.fetchall())

for ingest_table in [ 'sub', 'num', 'tag', 'ticker', 'average_fx', 'closing_fx', 'sic_isic',
                      't_a', 't_c', 't_d', 't_f', 't_r', 't_i']:
    sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
    # print(sql)
    qres = engine.execute(sql)
    # print(qres.fetchall())

In [None]:
sql = f"show tables in {ingest_schema}"
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

sql = f"""
drop schema if exists {ingest_catalog}.{ingest_schema}
"""
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

In [None]:
# make sure schema exists, or table creation below will fail in weird ways
sql = f"""
create schema {ingest_catalog}.{ingest_schema}
"""
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

For now, create SIC -> ISIC crosswalk by hand

In [None]:
sic_isic = {
    2911: 1920, # Petroleum refining
    3714: 2910, # Motor Vehicle Manufacturing
    3312: 2410, # Steel
    4911: 4010, # Electricity Generation
}

df = pd.DataFrame.from_dict(sic_isic, orient='index', columns=['isic'])
df.reset_index(inplace=True)
df.rename(columns={'index':'sic'}, inplace=True)
df

In [None]:
buf = io.BytesIO()
df.to_parquet(path=buf)
buf.seek(0)
trino_bucket.upload_fileobj(Fileobj=buf,
                      Key=f'trino/{ingest_schema}/sic_isic/data.parquet')

sql = f"""
drop table if exists {ingest_schema}.sic_isic;
create table {ingest_schema}.sic_isic(
    sic integer,
    isic integer
) with (
    format = 'parquet',
    external_location = 's3a://{trino_bucket.name}/trino/{ingest_schema}/sic_isic/'
)
"""
for sql_stmt in sql.split(';'):
    print(sql_stmt)
    qres = engine.execute(sql_stmt)
    print(qres.fetchall())

Load `ticker` file (updated sporadically from https://www.sec.gov/include/ticker.txt)

In [None]:
ticker_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'SEC-DERA/ticker.txt')
ticker_file.download_file(f'/tmp/dera-ticker.txt')
ticker_df = pd.read_csv(f'/tmp/dera-ticker.txt', names=['tname', 'cik'], header=None, sep='\t', dtype={'tname':'string','cik':'int64'}, engine='c')
ticker_dict = dict(zip(ticker_df.cik, ticker_df.tname))

In [None]:
ticker_df

In [None]:
buf = io.BytesIO()
ticker_df.to_parquet(path=buf)
buf.seek(0)
trino_bucket.upload_fileobj(Fileobj=buf,
                      Key=f'trino/{ingest_schema}/ticker/data.parquet')

sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.ticker;
create table {ingest_catalog}.{ingest_schema}.ticker(
    cik bigint,
    tname varchar
) with (
    format = 'parquet',
    external_location = 's3a://{trino_bucket.name}/trino/{ingest_schema}/ticker/'
);
select count (*) from {ingest_catalog}.{ingest_schema}.ticker;
select * from {ingest_catalog}.{ingest_schema}.ticker limit 10
"""
for sql_stmt in sql.split(';'):
    print(sql_stmt)
    qres = engine.execute(sql_stmt)
    print(qres.fetchall())

Prepare GLEIF matching data

In [None]:
gleif_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'mtiemann-GLEIF/DERA-matches.csv')
gleif_file.download_file(f'/tmp/dera-gleif.csv')
gleif_df = pd.read_csv(f'/tmp/dera-gleif.csv', header=0, sep=',', dtype=str, engine='c')
gleif_dict = dict(zip(gleif_df.name, gleif_df.LEI))

Load the SUB, NUM, and TAG tables into Trino

In [None]:
import re
import uuid

# Borrowed/stole this definition from SEC Corp Financials notebook...
float_tags = [
    'EntityPublicFloat',
    'FreeFloat',
    'PublicFloat',
    'PublicFloatValue',
]

# These are in priority preference order
share_tags = [
    'EntityCommonStockSharesOutstanding',
    'CommonStockSharesOutstanding',
    'SharesOutstanding',
    'WeightedAverageNumberOfDilutedSharesOutstanding',
    'WeightedAverageNumberOfSharesOutstandingBasic',
]

shareprice_tags = [
    'SharePrice',
    'PerSharePrice',
    'CommonStockValueOne',
    'MarketValuePerShare',
    'SaleOfStockPricePerShare',
    'CashPricePerOrdinaryShare',
    'TreasurySharesValuePerShare',
    'SharesOutstandingPricePerShare',
]

treasury_share_tags = [
    'TreasuryStockShares',
    # 'TreasuryStockShares1',
]

treasury_value_tags = [
    'TreasurySharesMarketValue',
    'FairValueOfTreasuryShares',
    'MarketValueOfTreasuryShares',
]

ambiguous_tags = [
    # For AES (and about 10% of others), CommonStockValue is a share price
    # For Excelon (and about 90% of others), it's the total value of outstanding common shares
    'CommonStockValue',
    'CommonStockValueOutstanding',
]

all_float_helper_tags = share_tags + shareprice_tags + treasury_share_tags + treasury_value_tags + ambiguous_tags

dera_regex = re.compile(r' ?/.*$')
dera_df = {}

from math import floor

def generate_intermediate_ddate_value(df_price, df_shares):
    if len(df_shares)==1:
        df = df_shares.iloc[[0]].copy()
        return df
    year1 = df_shares.iloc[0].ddate
    year2 = df_shares.iloc[1].ddate
    if (year1-year2).days > 731:
        print("gap years")
        print(df_shares.iloc[0:2])
    df = df_shares.iloc[[0]].copy()
    year_end = pd.to_datetime(f"{floor((year1.year+year2.year)/2.0)}1231", format='%Y%m%d', utc=True)
    df.ddate = year_end
    if year1.year==year2.year:
        print("same years")
        print(df_shares.iloc[0:2])
        df.value = (df_shares.iloc[0].value + df_shares.iloc[1].value)/2.0
    else:
        df.value = ((365.0-(year1-year_end).days)*df.value + (year_end-year2).days*df_shares.iloc[1].value)/365.0
    df.version = df_shares.iloc[0].adsh
    print(f"adding fact ({df.tag})")
    display(df)
    return df

# When this function is called, we already know that we have no matches in FLOAT_TAGS.
# GROUPED_DF is grouped by ADSH and only for annual reports.  DDATE can be anything (because many reports look back 1-5 years)
# We are working these annual reports quarter by quarter for the quarter in which they are reported

def infer_float(grouped_df):
    new_df = pd.DataFrame()
    for key, item in grouped_df:
        df = grouped_df.get_group(key)
        # We have no overall float value.  Build from shares outstanding * price
        df_shares = df[df.tag.isin(share_tags) & (df.value>0)]
        if df_shares.empty:
            # if not df[df.tag.isin(ambiguous_tags)].empty:
            #     print(f"CommonStockValue exists w/o Shares: {df.head()}\n")
            #     pass
            continue
        else:
            for share_tag in share_tags:
                if not df_shares[df_shares.tag==share_tag].empty:
                    df_shares = df_shares[df_shares.tag==share_tag]
                    break
            df_shares = df_shares.sort_values('ddate', ascending=False)
        df_prices = df[df.tag.isin(shareprice_tags)]
        # We have no overall price.  Build from price derived from treasury valuation
        if df_prices.empty:
            df_treasury_shares = df[df.tag.isin(treasury_share_tags)]
            df_treasury_value = df[df.tag.isin(treasury_value_tags)]
            if df_treasury_shares.empty or df_treasury_value.empty:
                # if not df[df.tag.isin(ambiguous_tags)].empty:
                #     print(f"CommonStockValue exists w/o Treasury Shares+Prices: {df.head()}\n")
                #     pass
                continue
            df_svp = df_treasury_value.merge(df_treasury_shares, on=['adsh', 'ddate', 'coreg'])
            if df_svp.empty:
                print(f"{df.adsh.iat[0]}: merge failed (1)")
                continue
            # Pick latest date / largest number of shares as basis
            df_float = df_svp.sort_values(['ddate', 'value_y'], ascending=False).iloc[[0]].copy()
            price_per_share = df_float.value_x.squeeze() / df_float.value_y.squeeze()
            df_float.rename(columns={'uom_x', 'uom'},inplace=True)
            tag = 'ComputedTreasuryFloat'
        else:
            # if df_prices[df_prices.tag.str.startswith('ShareBasedCompensationArrangementByShareBasedPaymentAward')].empty:
            #     print(f"Must use market prices; len(df_prices) =  {len(df_prices)}")
            # else:
            #     print(f"Can use Share Based Comp {df_prices.tag.str[45:]}:\n{df_prices}\n\n")
            # We derive a price from market reports
            df_svp = df_prices.merge(df_shares, on=['adsh', 'ddate', 'coreg'])
            if df_svp.empty:
                if len(df_prices)==1:
                    df_shares = generate_intermediate_ddate_value(df_prices, df_shares.sort_values('ddate', ascending=False))
                    df_float = df_shares
                    price_per_share = df_prices.value.squeeze()
                    # print("merge rescued (2)")
                    # display(df_shares)
                else:
                    print(f"{df.adsh.iat[0]}: merge failed (2)")
                    print(f"len(df_prices) = {len(df_prices)}")
                    print(f"len(df_shares) = {len(df_shares)}")
                    display(df_prices)
                    display(df_shares)
                    continue
            else:
                # Pick latest date / largest number of shares as basis
                df_float = df_svp.sort_values(['ddate', 'value_y'], ascending=False).iloc[[0]].copy()
                price_per_share = df_float.value_x.squeeze() # value_x is a price in this case
                df_float.rename(columns={'uom_x':'uom'},inplace=True)
            tag = 'ComputedMarketFloat'
        df_float = df_float[['adsh', 'ddate', 'uom', 'coreg']].copy()
        df_float['tag'] = tag
        # TODO: should connect price ddate with total shares ddate
        total_shares = df_shares.iloc[0].value
        df_float['value'] = price_per_share * total_shares
        df_float['qtrs'] = 0
        df_float['srcdir'] = 'computed'
        df_float['version'] = df_float['footnote'] = pd.NA
        df_float = df_float.astype(df.drop(columns=['fp']).dtypes.to_dict())
        new_df = new_df.append(df_float)
    return new_df

def read_dera_table(zf, fy_qtr, tbl):
    """From a local file ZF, read data for the period FY_QTR for the DERA table TBL.
    Return the Dataframe created so that when it is time to create the actual Trino table
    we know what the shape of the data should look like.  The returned DF has all the data
    of the specific ingestion, not all the data of all the ingestions of data for TBL."""
    global dera_df
    
    df = pd.read_csv(zf, header=0, sep='\t', dtype='string', keep_default_na=False, nrows = None, engine='c')
    df['srcdir'] = fy_qtr
    df.srcdir = df.srcdir.astype('string')
    
    # df = df.convert_dtypes (infer_objects=False, convert_string=True, convert_integer=False, convert_boolean=False, convert_floating=False)
    # Print the output
    # print(df.dtypes)
    
    if tbl=='sub':
        df.name = df.name.map(lambda x: re.sub(dera_regex, '', x))
        df.name = df.name.astype('string')
        df['LEI'] = df.name.map(gleif_dict)
        df.LEI = df.LEI.astype('string')
        df.cik = df.cik.astype('int32')
        df.loc[df.sic=='', 'sic'] = pd.NA
        df.sic = df.sic.astype('Int16')
        df.loc[df.ein=='', 'ein'] = pd.NA
        df.ein = df.ein.astype('Int64')
        df.wksi = df.wksi.astype('bool')
        # df.wksi = df.wksi.astype('int32')
        df.period = pd.to_datetime(df.period, format='%Y%m%d', utc=True, errors='coerce')
        df.fy = pd.to_datetime(df.fy, format='%Y', utc=True, errors='coerce')
        df.filed = pd.to_datetime(df.filed, format='%Y%m%d', utc=True)
        df.accepted = pd.to_datetime(df.accepted, format='%Y-%m-%d %H:%M:%S', utc=True)
        df.prevrpt = df.prevrpt.astype('bool')
        df.detail = df.detail.astype('bool')
        df.nciks = df.nciks.astype('int16')
        
        cols = df.columns.tolist()
        # Move LEI to a more friendly location in the column order
        cols = cols[0:3] + [cols[-1]] + cols[3:-1]
        df = df[cols]
    elif tbl=='num':
        # documentation wrongly lists coreg as NUMERIC length 256.  It is ALPHANUMERIC.
        if fy_qtr=='2021q3':
            df.loc[df.ddate=='30210630', 'ddate'] = '20210630'
        if fy_qtr=='2019q2':
            df.loc[df.ddate=='29171231', 'ddate'] = '20171231'
        # Fix some bad AES data
        if fy_qtr=='2021q1':
            df.loc[(df.adsh=='0000874761-21-000015')&(df.tag=='CommonStockValue')&(df.ddate=='20190630'), 'ddate'] = '20200630'
        elif fy_qtr=='2020q1':
            df.loc[(df.adsh=='0000874761-20-000012')&(df.tag=='EntityPublicFloat')&(df.ddate=='20180630'), 'ddate'] = '20190630'
            df.loc[(df.adsh=='0000874761-20-000012')&(df.tag=='CommonStockValue')&(df.ddate=='20180630'), 'ddate'] = '20190630'
        df.ddate = pd.to_datetime(df.ddate, format='%Y%m%d', utc=True)
        df.qtrs = df.qtrs.astype('int16')
        df.loc[df.coreg=='', 'coreg'] = pd.NA
        df.loc[df.value=='', 'value'] = pd.NA
        df.value = df.value.astype('Float64')
        df.loc[df.footnote=='', 'footnote'] = pd.NA
        
        print(f"Inferring floats: start len(df) = {len(df)}")
        annual_df = dera_df['sub'][dera_df['sub'].form.isin(['10-K','20-F','40-F'])]
        df['fp'] = df.adsh.map(dict(zip(annual_df.adsh,annual_df.fp)))
        print(f"len(df[df.fp=='FY']) = {len(df[df.fp=='FY'])}")
        df = df[df.fp=='FY']
        # df = df.assign(cik=df.adsh.str[:10])
        df_has_float = df[df.tag.isin(float_tags)]
        print(f"len(df_has_float) = {len(df_has_float)}")
        df_needs_float = df[~df.adsh.isin(df_has_float.adsh)]
        print(f"len(df_needs_float) = {len(df_needs_float)}")
        float_df = infer_float(df_needs_float[df_needs_float.coreg.isna()
                                              &df_needs_float.tag.isin(all_float_helper_tags)
                                              &(df_needs_float.value>0)].groupby(['adsh'], as_index=False))
        df = df.drop(columns=['fp'])
        float_df = float_df.astype(df.dtypes.to_dict())
        print(f"{len(float_df)} floats inferred; {len(float_df[float_df.tag=='ComputedTreasuryFloat'])} treasury-based; {len(float_df[float_df.tag=='ComputedMarketFloat'])} market-based")
        df = df.append(float_df, ignore_index=True)
    elif tbl=='tag':
        df.custom = df.custom.astype('bool')
        df.abstract = df.abstract.astype('bool')
        df.loc[df.crdr=='', 'crdr'] = pd.NA
        df.loc[df.tlabel=='', 'tlabel'] = pd.NA
        df.loc[df.doc=='', 'doc'] = pd.NA
    # print(df.dtypes)
    # display(df.head())

    return df

In [None]:
import io
import zipfile
import datetime

objects=source_bucket.objects.filter(Prefix='SEC-DERA/20')

dera_tables = ['sub', 'num', 'tag']

for obj in objects:
    if obj.key.endswith('.zip'):
        zipfile_src = s3_source.Object(os.environ['S3_LANDING_BUCKET'],obj.key)
        tmpname = obj.key.split('/')[-1]
        zipfile_src.download_file(f'/tmp/{tmpname}')
        zipfile_obj = zipfile.ZipFile(f'/tmp/{tmpname}', mode='r')
        fy_qtr = tmpname.split('.')[0]
        for tbl in dera_tables:
            print(f'{fy_qtr} - {tbl}')
            with zipfile_obj.open(f"{tbl}.txt") as zf:
                # Read data from ZF into a dataframe.
                dera_df[tbl] = read_dera_table (zf, fy_qtr, tbl)
        zipfile_obj.close()

        if False:
            # Alas, there is some minor post-fixing we need to do before ingesting into parquet
            df = dera_df['num']
            num_fields = df.columns
            df = df[df.tag=='ComputedTreasuryFloat']
            treasury_df = dera_df['sub'].loc[dera_df['sub'].fp=='FY', ['adsh', 'cik', 'name','fye', 'fy', ]].merge(df, on='adsh')
            if (len(treasury_df)>0):
                display(treasury_df)
                grouped_df = treasury_df.groupby('cik')
                for key, item in grouped_df:
                    if len(item)==1:
                        df = item[num_fields].copy()
                        df.ddate = pd.to_datetime(f"{item.fy.squeeze().year}1231", format='%Y%m%d')
                        df.version = item.adsh.squeeze()
                        # df = df.astype(dera_df['num'].dtypes.to_dict())
                        print("adding fact (1)")
                        print(df)
                        df = df.astype(dera_df['num'].dtypes.to_dict())
                        dera_df['num'] = dera_df['num'].append(df)
                    else:
                        item = item.sort_values('ddate', ascending=False).reset_index()
                        df = generate_intermediate_ddate_value(item.iloc[0:2])
                        dera_df['num'] = dera_df['num'].append(df)
        
        # Now output as parquet files
        for tbl in dera_tables:
            buf = io.BytesIO()
            dera_df[tbl].to_parquet(path=buf)
            buf.seek(0)
            fy_qtr = dera_df[tbl].iloc[0].srcdir
            trino_bucket.upload_fileobj(Fileobj=buf,
                                        Key=f'trino/{ingest_schema}/{tbl}/{fy_qtr}.parquet')

# Once we have all our parquet files in place, load up the tables with their directory contents
for tbl in dera_tables:
    if tbl not in dera_df:
        error(f'{tbl} data not found')
    table_check = engine.execute(f'drop table if exists {ingest_catalog}.{ingest_schema}.{tbl}')
    for row in table_check.fetchall():
        print(row)

    columnschema = create_table_schema_pairs(dera_df[tbl], typemap={'int16':'smallint', 'Int16':'smallint'})
    tabledef = f"""
create table if not exists {ingest_catalog}.{ingest_schema}.{tbl} (
{columnschema}
) with (
format = 'parquet',
external_location = 's3a://{trino_bucket.name}/trino/{ingest_schema}/{tbl}/'
)
"""
    print(tabledef)

    table_create = engine.execute(tabledef)
    for row in table_create.fetchall():
        print(row)

    dataset_query = (f'SELECT * FROM {ingest_catalog}.{ingest_schema}.{tbl} limit 10')
    print(dataset_query)
    dataset = engine.execute(dataset_query)
    for row in dataset.fetchall():
        print(row)

In [None]:
# Borrow metadata code from DERA-iceberg if/when we need it

In [None]:
tablenames = ['sub', 'num', 'tag', 'ticker']
l = []
for tbl in tablenames:
    qres = engine.execute(f'select count (*) from {ingest_catalog}.{ingest_schema}.{tbl}')
    l.append(qres.fetchall()[0])
print(list(zip(tablenames, l)))