In [7]:
import requests
# uncomment this if you need to install the packages
# !{sys.executable} -m pip install pyarrow
import polars as pl
from datetime import datetime, timedelta
import time

pl.Config.set_tbl_rows(100)
email = "pmay24@wooster.edu"
headers = {"User-Agent": email}

tickers_cik = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers)
time.sleep(.1)
df = pl.DataFrame(tickers_cik.json())

# ugly json flattening in regular python sorry :_( 
ciks, tiks, title = [], [], []
for i in df.iter_rows():
    for data in i:
        # fix ragged cik nums
        temp = str(data["cik_str"])
        temp = "0" * (10 - len(temp)) + temp
        ciks.append(temp)
        tiks.append(data["ticker"])
        title.append(data["title"])

# better dataframe
reformatted = pl.DataFrame( {
    "cik_str":ciks,
    "ticker":tiks,
    "name":title}
    )

In [8]:
# get all the historical data we want here!
# change this to whatever ticker you want to investigate
lookup = "WD"

lookup_cik, _, stock_name =reformatted.row(by_predicate=(pl.col("ticker")==lookup))

# edit the xBrl tags as needed
# instead of income, what if we look at Earnings?
net_income_req = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{lookup_cik}/us-gaap/NetIncomeLoss.json"

# NetCashProvidedByUsedInOperatingActivities
# CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents
cash_flow_req = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{lookup_cik}/us-gaap/NetCashProvidedByUsedInOperatingActivities.json"

# RevenueFromContractWithCustomerExcludingAssessedTax
# RevenuesTotal
# NoninterestIncomeExcludingProvisionForOtherCreditLosses
# Revenues
revenue_req = f"""https://data.sec.gov/api/xbrl/companyconcept/CIK{lookup_cik}/us-gaap/Revenues.json"""

net_inc = requests.get(net_income_req, headers=headers)
time.sleep(.1)
cash_flow = requests.get(cash_flow_req, headers=headers)
time.sleep(.1)
revenue = requests.get(revenue_req, headers=headers)
time.sleep(.1)

In [9]:
# uh stack overflow??? brain flattened 0.o
def flatten(kv, prefix=[]):
    for k, v in kv.items():
        if isinstance(v, dict):
            yield from flatten(v, prefix+[str(k)])
        else:
            if prefix:
                yield '_'.join(prefix+[str(k)]), v
            else:
                yield str(k), v


income_pl = pl.DataFrame({k:v for k, v in flatten(kv)} for kv in net_inc.json()["units"]["USD"])
cash_pl = pl.DataFrame({k:v for k, v in flatten(kv)} for kv in cash_flow.json()["units"]["USD"])
revenue_pl = pl.DataFrame({k:v for k, v in flatten(kv)} for kv in revenue.json()["units"]["USD"])

def yoy_change_formatter(dataf, metric):
    dataf = dataf.with_columns(
        [pl.col("start").str.strptime(pl.Date, fmt="%Y-%m-%d").cast(pl.Date),
        pl.col("end").str.strptime(pl.Date, fmt="%Y-%m-%d").cast(pl.Date),
        pl.col("filed").str.strptime(pl.Date, fmt="%Y-%m-%d").cast(pl.Date),
        ]).drop("accn").filter(
            (pl.col("end") - pl.col("start") >= timedelta(weeks=51)) & 
            (pl.col("frame") != None)
        ).drop("fy").drop("fp").drop("form").drop("filed").drop("frame")

    # print( -((dataf[metric].shift(1) - dataf[metric]) / np.abs(df[metric].shift(1)) * 100) )
    return dataf.with_columns(((pl.col("val").diff(1)/abs(pl.col("val").shift(1)) * 100)).alias(f"{metric} yoy change")).drop("val")
    

income_pl = income_pl.pipe(yoy_change_formatter, "Net Income")
cash_pl = cash_pl.pipe(yoy_change_formatter, "Cash Flow")
revenue_pl = revenue_pl.pipe(yoy_change_formatter, "Revenue")


combined = income_pl.join(cash_pl, on="start", how="left").drop("end_right")
combined = combined.join(revenue_pl, on="start", how="left").drop("end_right")
print(combined)

print("All time average pct changes")
print(combined.mean())

last_5yrs = combined.filter(pl.col("start") >= datetime(2017, 1, 1))
print("\nLast 5 Years averages")
print(last_5yrs.mean())


shape: (14, 5)
┌────────────┬────────────┬───────────────────────┬──────────────────────┬────────────────────┐
│ start      ┆ end        ┆ Net Income yoy change ┆ Cash Flow yoy change ┆ Revenue yoy change │
│ ---        ┆ ---        ┆ ---                   ┆ ---                  ┆ ---                │
│ date       ┆ date       ┆ f64                   ┆ f64                  ┆ f64                │
╞════════════╪════════════╪═══════════════════════╪══════════════════════╪════════════════════╡
│ 2008-01-01 ┆ 2008-12-31 ┆ null                  ┆ null                 ┆ null               │
│ 2009-01-01 ┆ 2009-12-31 ┆ -104.235241           ┆ 650.948599           ┆ -8.038669          │
│ 2010-01-01 ┆ 2010-12-31 ┆ 4381.001391           ┆ 53.368136            ┆ 17.933267          │
│ 2011-01-01 ┆ 2011-12-31 ┆ 6.810828              ┆ -45.002732           ┆ 22.522835          │
│ 2012-01-01 ┆ 2012-12-31 ┆ 26.563412             ┆ 54.207478            ┆ 9.716327           │
│ 2013-01-01 ┆ 2013-12-31