In [28]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import numpy as np

In [29]:
# Create engine using the `sec13f.sqlite` database file
engine = create_engine("sqlite:///sec13f.sqlite")

In [30]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [31]:
# Use the Base class to reflect the database tables
# create classes based on database
Base.prepare(engine, reflect=True)

In [32]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['indsectorindgroup', 'latest_positions', 'positions']

In [33]:
# Assign the positions class to a variable called `Position`
Position = Base.classes.positions

In [34]:
# Create a session
session = Session(engine)

In [35]:
# Display the row's columns and data in dictionary format
symbols = session.query(Position.cusip)\
    .group_by(Position.cusip)\
    .all()
print(symbols)

[('00206R102',), ('02376R102',), ('025816109',), ('037833100',), ('064058100',), ('16117M305',), ('16119P108',), ('167250109',), ('191216100',), ('20825C104',), ('22160K105',), ('23918K108',), ('244199105',), ('247361702',), ('25470M109',), ('25490A309',), ('30219G108',), ('30231G102',), ('369604103',), ('37045V100',), ('37733W105',), ('38141G104',), ('384637104',), ('459200101',), ('478160104',), ('49456B101',), ('500754106',), ('50076Q106',), ('523768109',), ('531229102',), ('531229300',), ('531229409',), ('531229607',), ('531229854',), ('531229870',), ('55261F104',), ('57636Q104',), ('584404107',), ('58441K100',), ('609207105',), ('61166W101',), ('615369105',), ('637071101',), ('67011P100',), ('718546104',), ('740189105',), ('742718109',), ('76131D103',), ('80105N105',), ('82968B103',), ('844741108',), ('85571Q102',), ('867224107',), ('891027104',), ('90130A101',), ('902973304',), ('903293405',), ('910047109',), ('911312106',), ('92343E102',), ('92343V104',), ('92345Y106',), ('92553

In [36]:
symbol_list = np.ravel(symbols)
print(symbol_list)

['00206R102' '02376R102' '025816109' '037833100' '064058100' '16117M305'
 '16119P108' '167250109' '191216100' '20825C104' '22160K105' '23918K108'
 '244199105' '247361702' '25470M109' '25490A309' '30219G108' '30231G102'
 '369604103' '37045V100' '37733W105' '38141G104' '384637104' '459200101'
 '478160104' '49456B101' '500754106' '50076Q106' '523768109' '531229102'
 '531229300' '531229409' '531229607' '531229854' '531229870' '55261F104'
 '57636Q104' '584404107' '58441K100' '609207105' '61166W101' '615369105'
 '637071101' '67011P100' '718546104' '740189105' '742718109' '76131D103'
 '80105N105' '82968B103' '844741108' '85571Q102' '867224107' '891027104'
 '90130A101' '902973304' '903293405' '910047109' '911312106' '92343E102'
 '92343V104' '92345Y106' '92553P201' '92826C839' '92927K102' '931142103'
 '939640108' '949746101' 'G0750C108' 'G5480U104' 'G5480U120' 'G5480U138'
 'G5480U153']


In [37]:
import os
import csv
import time
import requests
from bs4 import BeautifulSoup

In [38]:
base_url = "https://search13f.com/securities/neighbors/"
dec_list = []
for symbol in symbol_list:
    time.sleep(10)
    url=base_url + symbol
    #print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "lxml")
    #print(page.text)
    trs = soup.find("table",{"id":"snippet"}).find_all("tr")
    #print(trs)
    tr1 = trs[1]
    td1s = tr1.find_all("td")
    #print(td1s)
    try:
        _, ticker = td1s[0].text.split(":", 1)
    except:
        ticker = ""
    tr = trs[-1]
    tds = tr.find_all("td")
    #print(tr)
    _, indsec = tds[0].text.split(": ", 1)
    _, indgrp = tds[1].text.split(": ", 1)
    #print(f"industry sector: {indsec}")
    #print(f"industry group: {indgrp}")
    symbol_dict = {"ticker": ticker, "cusip": symbol, "indsec":indsec, "indgrp":indgrp}
    print(symbol_dict)
    dec_list.append(symbol_dict)

{'ticker': 'T', 'cusip': '00206R102', 'indsec': 'Public Utilities', 'indgrp': 'Telecommunications Equipment'}
{'ticker': 'AAL', 'cusip': '02376R102', 'indsec': 'Transportation', 'indgrp': 'Air Freight/Delivery Services'}
{'ticker': 'AXP', 'cusip': '025816109', 'indsec': 'Finance', 'indgrp': 'Finance: Consumer Services'}
{'ticker': 'AAPL', 'cusip': '037833100', 'indsec': 'Technology', 'indgrp': 'Computer Manufacturing'}
{'ticker': 'BK', 'cusip': '064058100', 'indsec': 'Finance', 'indgrp': 'Major Banks'}
{'ticker': 'LBRDA', 'cusip': '16117M305', 'indsec': 'Consumer Services', 'indgrp': 'Television Services'}
{'ticker': '', 'cusip': '16119P108', 'indsec': 'None', 'indgrp': 'None'}
{'ticker': 'CBI', 'cusip': '167250109', 'indsec': 'Basic Industries', 'indgrp': 'Engineering & Construction'}
{'ticker': 'KO', 'cusip': '191216100', 'indsec': 'Consumer Non-Durables', 'indgrp': 'Beverages (Production/Distribution)'}
{'ticker': 'COP', 'cusip': '20825C104', 'indsec': 'Energy', 'indgrp': 'Integrate

In [40]:
root_path = os.getcwd()
output = os.path.join(root_path,"decorated_data.csv")
with open(output, "w") as csvfile:
    field_names = ["ticker", "cusip",  "indsec", "indgrp"]
    csvwriter = csv.DictWriter(csvfile, lineterminator='\n', fieldnames=field_names)
    csvwriter.writeheader()
    csvwriter.writerows(dec_list)