In [8]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

import dotenv
dotenv.load_dotenv()

True

In [4]:
# Get the S&P 500 companies from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

# Get the page
response = requests.get(url)

# Parse the page
soup = BeautifulSoup(response.text, 'html.parser')

# Get the table
table = soup.find('table', {'id': 'constituents'})

# Get the rows
rows = table.find_all('tr')[1:]  # Skip the header row

# Get the compnay name and ticket
data = []
for row in rows:
    cols = row.find_all('td')
    symbol = cols[0].text.strip()
    name = cols[1].text.strip() 
    data.append({'ticker': symbol, 'name': name})

data[:10], len(data)

([{'ticker': 'MMM', 'name': '3M'},
  {'ticker': 'AOS', 'name': 'A. O. Smith'},
  {'ticker': 'ABT', 'name': 'Abbott'},
  {'ticker': 'ABBV', 'name': 'AbbVie'},
  {'ticker': 'ACN', 'name': 'Accenture'},
  {'ticker': 'ADBE', 'name': 'Adobe Inc.'},
  {'ticker': 'AMD', 'name': 'Advanced Micro Devices'},
  {'ticker': 'AES', 'name': 'AES Corporation'},
  {'ticker': 'AFL', 'name': 'Aflac'},
  {'ticker': 'A', 'name': 'Agilent Technologies'},
  {'ticker': 'APD', 'name': 'Air Products and Chemicals'},
  {'ticker': 'ABNB', 'name': 'Airbnb'},
  {'ticker': 'AKAM', 'name': 'Akamai'},
  {'ticker': 'ALB', 'name': 'Albemarle Corporation'},
  {'ticker': 'ARE', 'name': 'Alexandria Real Estate Equities'},
  {'ticker': 'ALGN', 'name': 'Align Technology'},
  {'ticker': 'ALLE', 'name': 'Allegion'},
  {'ticker': 'LNT', 'name': 'Alliant Energy'},
  {'ticker': 'ALL', 'name': 'Allstate'},
  {'ticker': 'GOOGL', 'name': 'Alphabet Inc. (Class A)'},
  {'ticker': 'GOOG', 'name': 'Alphabet Inc. (Class C)'},
  {'ticker':

In [5]:
# create dataframe
df = pd.DataFrame(data)

df.head()

Unnamed: 0,ticker,name
0,MMM,3M
1,AOS,A. O. Smith
2,ABT,Abbott
3,ABBV,AbbVie
4,ACN,Accenture


In [24]:
import os

# extract descriptions of each ticker from alphavantage
api_key = os.getenv("ALPHAVANTAGE_API_KEY")


for ticker in df["ticker"]:
    url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={ticker}&apikey={api_key}"
    r = requests.get(url)
    data = r.json()
    # append to dataframe
    df.loc[df["ticker"] == ticker, "description"] = data.get("Description")


df.head()

Unnamed: 0,ticker,name,description
0,MMM,3M,The 3M Company is an American multinational co...
1,AOS,A. O. Smith,A. O. Smith Corporation is an American manufac...
2,ABT,Abbott,Abbott Laboratories is an American multination...
3,ABBV,AbbVie,AbbVie is an American publicly traded biopharm...
4,ACN,Accenture,Accenture plc is an Irish-domiciled multinatio...


In [43]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

import dotenv
dotenv.load_dotenv()

import os
import json

prompt = ChatPromptTemplate.from_template(
    """You are now Phrase Generator, your job is to take in a company name, 
    description, and ticker and generate natural language phrases 
    that would be useful for people looking for that company

    the company name is: {name}
    the company description is: {description}
    the company ticker is: {ticker}

    
    For example if the company name is Apple, the description is iPhone maker, and the ticker is AAPL,
    you might generate phrases like "Big tech company" or "steve jobs company" etc
    
    To generate phrases for the company use your training data and the supplied description.

    Generate 50 casual natural language phrases and output them as json. Only respond in json format.

    """

)

model = ChatOpenAI(
    model = "gpt-4-turbo-preview",
    api_key = os.getenv("OPENAI_API_KEY")
)

output_parser = JsonOutputParser()

chain = prompt | model | output_parser

phrases = chain.invoke({"name": df["name"][0], "description": df["description"][0], "ticker": df["ticker"][0]})
phrases[:10], len(phrases), type(phrases)


(["Minnesota's industrial giant",
  'The Post-it Notes creator',
  'Safety and healthcare innovator',
  'Global conglomerate from Saint Paul',
  'Diverse product powerhouse',
  'Industry leader in adhesives',
  'The company behind Scotch Tape',
  'Personal protective gear supplier',
  '60,000 products under one roof',
  'From laminates to dental products'],
 50,
 list)