In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime as dt
import requests
import yfinance as yf
from time import sleep
import re
import bisect

import bs4
from bs4 import BeautifulSoup

sns.set_style('darkgrid')

In [None]:
headers = {"User-Agent": "????????@gmail.com"} # Your email goes here

def NPORT_Filings_from_CIK(cik, headers=headers):
    headers = headers
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    sleep(2)
    filings = requests.get(url, headers=headers).json()
    filings_df = pd.DataFrame(filings["filings"]["recent"])
    nport_filings_df = filings_df[filings_df["form"] == "NPORT-P"]
    nport_filings_df.loc[:,'filingDate'] = pd.to_datetime(nport_filings_df['filingDate'])
    nport_filings_df.loc[:,'reportDate'] = pd.to_datetime(nport_filings_df['reportDate'])
    return nport_filings_df

def gen_company_name_and_cik_list(headers=headers):
    headers = headers
    url = f"https://www.sec.gov/Archives/edgar/cik-lookup-data.txt"
    sleep(2)
    response = requests.get(url, headers=headers)
    #filings_df = pd.DataFrame(filings)
    return response.text.split('\n')

cik_list = gen_company_name_and_cik_list()

def holdings_from_NPORT(accessionNumber,primaryDocument,reportDate,headers=headers):

    sleep(2)

    url = f"https://www.sec.gov/Archives/edgar/data/1064641/{accessionNumber}/{primaryDocument}"
    
    response = requests.get(url, headers=headers)

    assert response.status_code == 200

    soup = BeautifulSoup(response.text, 'html.parser')

    stocks_source = soup.findAll("td",string='a. Name of issuer (if any). \n\t\t\t\t')
    CUSIPs_source = soup.findAll("td",string='d. CUSIP (if any).\n\t\t\t\t')
    weights_source = soup.findAll('td',string='Percentage value compared to net assets of the Fund.\n\t\t\t')

    stocks = [stock.parent.find('div').contents[0] for stock in stocks_source]
    CUSIPs = [CUSIP.parent.find('div').contents[0] for CUSIP in CUSIPs_source]
    weights = [weight.parent.find('div').contents[0] for weight in weights_source]
    CIKs = []

    for stock in stocks:
        index = bisect.bisect_left(cik_list, stock.upper())
        CIKs.append(cik_list[index].split(":")[1])

    holdings = pd.DataFrame({'Stock' : stocks,'CIK' : CIKs,f'Weighting in quarter starting {reportDate}' : weights})
    
    elem = soup.findAll('td',string="Series ID")[0]
    
    seriesID = elem.parent.parent.div.contents[0]
    
    return seriesID, holdings

In [None]:
filings = NPORT_Filings_from_CIK('0001064641')

SPDR_holdings = {}

for i in range(len(filings)):
    accessionNumber = filings["accessionNumber"].iloc[i].replace("-","")
    primaryDocument = filings["primaryDocument"].iloc[i]
    reportDate = filings["reportDate"].iloc[i]

    seriesID, holdings = holdings_from_NPORT(accessionNumber,primaryDocument,reportDate,headers=headers)

    if seriesID in SPDR_holdings:
        SPDR_holdings[seriesID].merge(holdings,how='outer')
    else:
        SPDR_holdings[seriesID] = holdings