In [5]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import numpy as np
from config import *

In [6]:
# create an engine to postgresql db
user = config['psql_user']
password = config['psql_pwd']
host = 'localhost'
port = '5432'
db = config['psql_db']
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(user, password, host, port, db)

# The return value of create_engine() is our connection object
engine = sqlalchemy.create_engine(url, client_encoding='utf8')

In [7]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [8]:
# Use the Base class to reflect the database tables
# create classes based on database
Base.prepare(engine, reflect=True)

In [9]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['positions']

In [10]:
# Assign the positions class to a variable called `Position`
Position = Base.classes.positions

In [11]:
# Create a session
session = Session(engine)

In [12]:
# Display the row's columns and data in dictionary format
symbols = session.query(Position.cusip)\
    .group_by(Position.cusip)\
    .all()
print(symbols)

[('20825C104',), ('478160104',), ('58441K100',), ('30219G108',), ('25470M109',), ('92343V104',), ('531229607',), ('23918K108',), ('244199105',), ('903293405',), ('862121100',), ('718546104',), ('92345Y106',), ('167250109',), ('61166W101',), ('92553P201',), ('531229870',), ('38141G104',), ('025816109',), ('740189105',), ('891027104',), ('16117M305',), ('949746101',), ('G5480U138',), ('531229854',), ('16119P108',), ('49456B101',), ('37733W105',), ('911312106',), ('615369105',), ('459200101',), ('50076Q106',), ('67011P100',), ('90130A101',), ('523768109',), ('637071101',), ('30231G102',), ('25490A309',), ('92343E102',), ('500754106',), ('742718109',), ('037833100',), ('609207105',), ('76131D103',), ('939640108',), ('584404107',), ('80105N105',), ('22160K105',), ('00206R102',), ('02376R102',), ('531229409',), ('531229102',), ('247361702',), ('910047109',), ('G5480U104',), ('384637104',), ('92826C839',), ('37045V100',), ('57636Q104',), ('931142103',), ('G0750C108',), ('G5480U153',), ('84474

In [13]:
symbol_list = np.ravel(symbols)
print(symbol_list)

['20825C104' '478160104' '58441K100' '30219G108' '25470M109' '92343V104'
 '531229607' '23918K108' '244199105' '903293405' '862121100' '718546104'
 '92345Y106' '167250109' '61166W101' '92553P201' '531229870' '38141G104'
 '025816109' '740189105' '891027104' '16117M305' '949746101' 'G5480U138'
 '531229854' '16119P108' '49456B101' '37733W105' '911312106' '615369105'
 '459200101' '50076Q106' '67011P100' '90130A101' '523768109' '637071101'
 '30231G102' '25490A309' '92343E102' '500754106' '742718109' '037833100'
 '609207105' '76131D103' '939640108' '584404107' '80105N105' '22160K105'
 '00206R102' '02376R102' '531229409' '531229102' '247361702' '910047109'
 'G5480U104' '384637104' '92826C839' '37045V100' '57636Q104' '931142103'
 'G0750C108' 'G5480U153' '844741108' '82968B103' '55261F104' '92927K102'
 '85571Q102' '87165B103' '867224107' '902973304' 'G5480U120' '064058100'
 '191216100' '531229300' '369604103']


In [14]:
import os
import csv
import time
import requests
from bs4 import BeautifulSoup

In [15]:
base_url = "https://search13f.com/securities/neighbors/"
dec_list = []
for symbol in symbol_list:
    time.sleep(10)
    url=base_url + symbol
    #print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "lxml")
    #print(page.text)
    trs = soup.find("table",{"id":"snippet"}).find_all("tr")
    #print(trs)
    tr1 = trs[1]
    td1s = tr1.find_all("td")
    #print(td1s)
    try:
        _, ticker = td1s[0].text.split(":", 1)
    except:
        ticker = ""
    tr = trs[-1]
    tds = tr.find_all("td")
    #print(tr)
    _, indsec = tds[0].text.split(": ", 1)
    _, indgrp = tds[1].text.split(": ", 1)
    #print(f"industry sector: {indsec}")
    #print(f"industry group: {indgrp}")
    symbol_dict = {"ticker": ticker, "cusip": symbol, "indsec":indsec, "indgrp":indgrp}
    print(symbol_dict)
    dec_list.append(symbol_dict)

{'ticker': 'COP', 'cusip': '20825C104', 'indsec': 'Energy', 'indgrp': 'Integrated oil Companies'}
{'ticker': 'JNJ', 'cusip': '478160104', 'indsec': 'Health Care', 'indgrp': 'Major Pharmaceuticals'}
{'ticker': '', 'cusip': '58441K100', 'indsec': '', 'indgrp': ''}
{'ticker': 'ESRX', 'cusip': '30219G108', 'indsec': 'Health Care', 'indgrp': 'Medical/Nursing Services'}
{'ticker': 'SATS', 'cusip': '25470M109', 'indsec': 'Technology', 'indgrp': 'Radio And Television Broadcasting And Communications Equipment'}
{'ticker': 'VZ', 'cusip': '92343V104', 'indsec': 'Public Utilities', 'indgrp': 'Telecommunications Equipment'}
{'ticker': '', 'cusip': '531229607', 'indsec': 'None', 'indgrp': 'None'}
{'ticker': 'DVA', 'cusip': '23918K108', 'indsec': 'Health Care', 'indgrp': 'Hospital/Nursing Management'}
{'ticker': 'DE', 'cusip': '244199105', 'indsec': 'Capital Goods', 'indgrp': 'Industrial Machinery/Components'}
{'ticker': 'USG', 'cusip': '903293405', 'indsec': 'Capital Goods', 'indgrp': 'Building Mate

In [17]:
root_path = os.getcwd()
output = os.path.join(root_path,"decorated_data.csv")
with open(output, "w") as csvfile:
    field_names = ["ticker", "cusip",  "indsec", "indgrp"]
    csvwriter = csv.DictWriter(csvfile, lineterminator='\n', fieldnames=field_names)
    csvwriter.writeheader()
    csvwriter.writerows(dec_list)