In [1]:
import sqlite3
import yfinance as yf
import pandas as pd

In [2]:
# Set up database
conn = sqlite3.connect('sp500_stocks.db') # Create or connect to the db file
cursor = conn.cursor() # Create a cursor to excute command

In [3]:
# Create neccessary tables
# Ticker info table
statement = """
    CREATE TABLE IF NOT EXISTS companies (
        ticker TEXT PRIMARY KEY,
        name TEXT NOT NULL,
        sector INTEGER,
        industry INTEGER,
        description TEXT,
        FOREIGN KEY (sector) REFERENCES sectors(sector_id),
        FOREIGN KEY (industry) REFERENCES industries(industry_id)
    )
"""
# If you want to override the table, use DROP TABLE first, then CREATE TABLE
cursor.execute(statement)


<sqlite3.Cursor at 0x23501ba3e40>

In [4]:
# Price info table
statement = """
    CREATE TABLE IF NOT EXISTS price (
    ticker TEXT,
    date DATE,
    open REAL,
    high REAL,
    low REAL,
    close REAL,
    adj_close REAL,
    volume INTEGER,
    PRIMARY KEY (ticker, date),
    FOREIGN KEY (ticker) REFERENCES companies(ticker)

)
"""
cursor.execute(statement)

<sqlite3.Cursor at 0x23501ba3e40>

In [5]:
# Sector info table
statement = """
    CREATE TABLE IF NOT EXISTS sectors (
        sector_id INTEGER PRIMARY KEY,
        sector TEXT,
        description TEXT
    )
"""
cursor.execute(statement)

<sqlite3.Cursor at 0x23501ba3e40>

In [6]:
# Industry info table
statement = """
    CREATE TABLE IF NOT EXISTS industries (
        industry_id INTEGER PRIMARY KEY,
        sector INTEGER,
        industry TEXT,
        FOREIGN KEY (sector) REFERENCES sectors(sector_id)
    )"""
cursor.execute(statement)

<sqlite3.Cursor at 0x23501ba3e40>

In [7]:
#Populate sectors and industries
sector_data = [
    (1, 'Energy', 'The energy sector covers companies that do business in the oil and natural gas industry.'),
    (2, 'Materials', 'The materials sector includes companies that provide various goods for use in manufacturing and other applications.'),
    (3, 'Industrials', 'The industrials sector encompasses a wide range of different businesses that generally involve the use of heavy equipment.'),
    (4, 'Utilities', 'The utilities sector encompasses every different type of utility company.'),
    (5, 'Health Care', 'The healthcare sector has two primary components: Pharmaceuticals, treatments based on biotechnology and healthcare equipment and services.'),
    (6, 'Financials', 'The financials sector includes businesses that are primarily related to handling money. Banks are a key industry group within the sector.'),
    (7, 'Consumer Discretionary', 'The consumer discretionary sector covers goods and services for which consumer demand depends upon consumer financial status.'),
    (8, 'Consumer Staples', 'The consumer staples sector includes goods and services that consumers need, regardless of their current financial condition or the current economic climate.'),
    (9, 'Information Technology', 'The information technology sector covers companies involved in the different categories of technological innovation.'),
    (10, 'Communication Services', 'The communication services sector is among the newest of the GICS sectors and includes a couple of major areas that used to be part of other sectors, such as telocommunication, media and entertainment.'),
    (11, 'Real Estate', 'The real estate sector is the newest GICS sector, having formerly been part of the financial sector. It generally includes two different types of investments related to real estate: Developement and REITs')
]

insert_sectors = """
INSERT INTO sectors (sector_id, sector, description) 
VALUES (?, ?, ?)"""

cursor.executemany(insert_sectors, sector_data)

<sqlite3.Cursor at 0x23501ba3e40>

In [8]:
cursor.execute("SELECT * FROM sectors")
sectors = cursor.fetchall() 
sectors_df = pd.DataFrame(sectors, columns=['sector_id', 'sector', 'description'])
sectors_df

Unnamed: 0,sector_id,sector,description
0,1,Energy,The energy sector covers companies that do bus...
1,2,Materials,The materials sector includes companies that p...
2,3,Industrials,The industrials sector encompasses a wide rang...
3,4,Utilities,The utilities sector encompasses every differe...
4,5,Health Care,The healthcare sector has two primary componen...
5,6,Financials,The financials sector includes businesses that...
6,7,Consumer Discretionary,The consumer discretionary sector covers goods...
7,8,Consumer Staples,The consumer staples sector includes goods and...
8,9,Information Technology,The information technology sector covers compa...
9,10,Communication Services,The communication services sector is among the...


In [10]:
# Populate industries
industry_data = [
    (1, 101, 'Energy Equipment & Services'),
    (1, 102, 'Oil, Gas & Consumable Fuels'),
    (2, 201, 'Chemicals'),
    (2, 202, 'Construction Materials'),
    (2, 203, 'Containers & Packaging'),
    (2, 204, 'Metals & Mining'),
    (2, 205, 'Paper & Forest Products'),
    (3, 301, 'Aerospace & Defense'),
    (3, 302, 'Building Products'),
    (3, 303, 'Construction & Engineering'),
    (3, 304, 'Electrical Equipment'),
    (3, 305, 'Industrial Conglomerates'),
    (3, 306, 'Machinery'),
    (3, 307, 'Trading Companies & Distributors'),
    (3, 308, 'Commercial Services & Supplies'),
    (3, 309, 'Professional Services'),
    (3, 310, 'Air Freight & Logistics'),
    (3, 311, 'Passenger Airlines'),
    (3, 312, 'Marine Transportation'),
    (3, 313, 'Ground Transportation'),
    (3, 314, 'Transportation Infrastructure'),
    (4, 401, 'Electric Utilities'),
    (4, 402, 'Gas Utilities'),
    (4, 403, 'Multi-Utilities'),
    (4, 404, 'Water Utilities'),
    (4, 405, 'Independent Power and Renewable Electricity Producers'),
    (5, 501, 'Health Care Equipment & Supplies'),
    (5, 502, 'Health Care Providers & Services'),
    (5, 503, 'Health Care Technology'),
    (5, 504, 'Biotechnology'),
    (5, 505, 'Pharmaceuticals'),
    (5, 506, 'Life Sciences Tools & Services'),
    (6, 601, 'Banks'),
    (6, 602, 'Financial Services'),
    (6, 603, 'Consumer Finance'),
    (6, 604, 'Capital Markets'),
    (6, 605, 'Mortgage Real Estate Investment Trusts (REITs)'),
    (6, 606, 'Insurance'),
    (7, 701, 'Automobiles Components'),
    (7, 702, 'Automobiles'),
    (7, 703, 'Household Durables'),
    (7, 704, 'Leisure Products'),
    (7, 705, 'Textiles, Apparel & Luxury Goods'),
    (7, 706, 'Hotels, Restaurants & Leisure'),
    (7, 707, 'Diversified Consumer Services'),
    (8, 801, 'Consumer Staples Distribution & Retail'),
    (8, 802, 'Beverages'),
    (8, 803, 'Food Products'),
    (8, 804, 'Tobacco'),
    (8, 805, 'Household Products'),
    (8, 806, 'Personal Products'),
    (9, 901, 'IT Services'),
    (9, 902, 'Software'),
    (9, 903, 'Communications Equipment'),
    (9, 904, 'Technology Hardware, Storage & Peripherals'),
    (9, 905, 'Electronic Equipment, Instruments & Components'),
    (9, 906, 'Semiconductors & Semiconductor Equipment'),
    (10, 1001,'Diversified Telecommunication Services'),
    (10, 1002,'Wireless Telecommunication Services'),
    (10, 1003,'Media'),
    (11, 1004,'Entertainment'),
    (10, 1005,'Interactive Media & Services'),
    (11, 1101,'Diversified REITs'),
    (11, 1102,'Industrial REITs'),
    (11, 1103,'Hotel & Resort REITs'),
    (11, 1104,'Office REITs'),
    (11, 1105,'Health Care REITs'),
    (11, 1106,'Residential REITs'),
    (11, 1107,'Retail REITs'),
    (11, 1108,'Specialized REITs'),
    (11, 1109,'Real Estate Management & Development')
]

insert_industry = """
INSERT INTO industries (sector, industry_id, industry) 
VALUES (?, ?, ?)"""
cursor.executemany(insert_industry, industry_data)

<sqlite3.Cursor at 0x23501ba3e40>

In [11]:
cursor.execute("SELECT * FROM industries")
industries = cursor.fetchall()
industries_df = pd.DataFrame(industries, columns=['industry_id', 'sector_id', 'industry'])
industries_df

Unnamed: 0,industry_id,sector_id,industry
0,101,1,Energy Equipment & Services
1,102,1,"Oil, Gas & Consumable Fuels"
2,201,2,Chemicals
3,202,2,Construction Materials
4,203,2,Containers & Packaging
...,...,...,...
66,1105,11,Health Care REITs
67,1106,11,Residential REITs
68,1107,11,Retail REITs
69,1108,11,Specialized REITs


In [None]:
# Commit changes and close the connection
conn.commit()
conn.close()