In [3]:
import os
import requests
import json
from typing import List
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama # type: ignore
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


load_dotenv("../.env")

MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [4]:
class Website:
    url: str
    title: str
    body: str
    links: List[str]

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [5]:
caps = Website("https://www.nhl.com/capitals/")
print(caps.get_contents())

Webpage Title:
Washington Capitals | Washington Capitals
Webpage Contents:
Skip to Main Content
Tickets
All Capitals Tickets
Single Game Tickets
2024-25 Season Tickets
Partial Plans
Promotions Schedule
Holiday Packs
GOVX Military & Govt Employee Discount Program
Special Ticket Offers
Group Tickets
VIP Seating
Account Manager
Tickets for Business
Using the New NHL Mobile App
NHL Ticket Exchange
Digital Ticketing
Capital One Arena
Transformation - The Vaults
Schedule
2024-25 Season Schedule
Practice Schedule
Where To Watch
Home Jersey Schedule
Schedule Sync & Download
Team
Capitals Roster
Capitals Prospects
Caps Alumni
Management
Coaching Staff
Equipment and Trainers
Staff Directory
Monumental Sports and Entertainment
AHL Hershey Bears
ECHL South Carolina Stingrays
News
Capitals News
Capitals Today
Dump N' Chase
Community News
Ted's Take
Video
All Video
Game Highlights
Mic'd Up
Capitals Locker Room
Caps365
Rinkside Update
Off the Ice
Coach's Corner
Caps Game Entertainment
Capitals Alumni

In [6]:
print(caps.links)

['#main-content', '/capitals', '/capitals', '/capitals/tickets/', 'https://www.ticketmaster.com/washington-capitals-tickets/artist/806039?brand=capitals&wt.mc_id=NHL_TEAM_WSH_SINGLE_GAME_TIX_LINK&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425_sgb&utm_content=tickets_nav', 'https://www.nhl.com/capitals/club-red-365/', 'https://www.nhl.com/capitals/tickets/partial-plans', 'https://www.nhl.com/capitals/tickets/promotions', 'https://www.nhl.com/capitals/tickets/holiday-packs', 'https://www.govx.com/tickets/entertainers/30/washington-capitals/', 'https://www.nhl.com/capitals/tickets/offers', 'https://www.nhl.com/capitals/tickets/group-tickets', 'https://www.nhl.com/capitals/tickets/vip', 'https://am.ticketmaster.com/monumental/?wt.mc_id=NHL_TEAM_WSH_ACCOUNT_MANAGER_TIX&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425sgb&utm_content=account_manager_tix', 'https://www.nhl.com/capitals/tickets/business', 'https://www.nhl.com/capitals/tickets/mobile-app-setup', '

In [7]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the team, \
such as links to an About page, Team, News, Schedule, History, Stats pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "team page", "url": "https://full.url/goes/here/team"},
        {"type": "news page": "url": "https://another.full.url/news"},
        {"type": "schedule page": "url": "https://another.full.url/schedule"},
        {"type": "history page": "url": "https://another.full.url/history"},
        {"type": "stats page": "url": "https://another.full.url/stats"},
        {"type": "standings page": "url": "https://another.full.url/standings"},
    ]
}
"""

In [8]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the team, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, Tickets, Video, Listen, Community, Fans, Youth Hockey, Shop, League, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [9]:
print(get_links_user_prompt(caps))

Here is the list of links on the website of https://www.nhl.com/capitals/ - please decide which of these are relevant web links for a brochure about the team, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, Tickets, Video, Listen, Community, Fans, Youth Hockey, Shop, League, email links.
Links (some might be relative links):
#main-content
/capitals
/capitals
/capitals/tickets/
https://www.ticketmaster.com/washington-capitals-tickets/artist/806039?brand=capitals&wt.mc_id=NHL_TEAM_WSH_SINGLE_GAME_TIX_LINK&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425_sgb&utm_content=tickets_nav
https://www.nhl.com/capitals/club-red-365/
https://www.nhl.com/capitals/tickets/partial-plans
https://www.nhl.com/capitals/tickets/promotions
https://www.nhl.com/capitals/tickets/holiday-packs
https://www.govx.com/tickets/entertainers/30/washington-capitals/
https://www.nhl.com/capitals/tickets/offers
https://www.nhl.com/capitals/tickets/group-tickets
htt

In [10]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)

In [11]:
get_links("https://www.nhl.com/capitals/")

{'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals/'},
  {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/management'},
  {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'},
  {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history/'},
  {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'},
  {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news/'}]}

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [13]:
print(get_all_details("https://www.nhl.com/capitals/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals/'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/management'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/coaching-staff'}, {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news/'}, {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'}, {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history/'}, {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'}, {'type': 'standings page', 'url': 'https://www.nhl.com/capitals/standings'}, {'type': 'roster page', 'url': 'https://www.nhl.com/capitals/roster'}, {'type': 'prospects page', 'url': 'https://www.nhl.com/capitals/prospects'}]}
Landing page:
Webpage Title:
Washington Capitals | Washington Capitals
Webpage Contents:
Skip to Main Content
Tickets
All Capitals Tickets
Single Game Tickets
2024-25 Season Tickets
Partial Plans
Promotions Schedule
Holiday Packs
GOVX Milit

In [14]:
system_prompt = "You are a sports marketing analyst that analyzes the contents of several relevant pages from a sports team website \
and creates a short brochure about the team for prospective fans and players to recruit. Respond in markdown.\
Include details of team history, team culture, team news, and team stats if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [15]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the team in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:40_000] # Truncate if more than 40,000 characters
    return user_prompt

In [16]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [17]:
create_brochure("Washington Capitals", "https://www.nhl.com/capitals")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/capitals'}, {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news'}, {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'}, {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history'}, {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'}, {'type': 'standings page', 'url': 'https://www.nhl.com/capitals/standings'}, {'type': 'roster page', 'url': 'https://www.nhl.com/capitals/roster'}, {'type': 'management page', 'url': 'https://www.nhl.com/capitals/team/management'}, {'type': 'coaching staff page', 'url': 'https://www.nhl.com/capitals/team/coaching-staff'}, {'type': 'practice page', 'url': 'https://www.nhl.com/capitals/team/practice'}]}


# Washington Capitals Team Brochure

## Welcome to the Capitals Community!
Experience the thrill of Washington Capitals hockey—a team with an inspiring legacy, vibrant culture, and a commitment to excellence on and off the ice.

---

### Team History
Founded in 1974, the Washington Capitals have a rich heritage that began with their inaugural season in the NHL. Over the decades, the team has overcome challenges and evolved into one of the league's premier franchises. 

- **2018 Stanley Cup Champions:** The pinnacle of the franchise's success culminated in a historic Stanley Cup victory, marking the first in franchise history. The journey to this achievement is celebrated in "The Gr8 Chase," referencing legendary player Alexander Ovechkin.

- **50th Anniversary:** As we approach the 2024-25 season, we proudly celebrate our 50 years of hockey, highlighting unforgettable memories, players, and milestones along the way.

### Team Culture
The Capitals embody a culture of camaraderie, resilience, and community engagement.

- **Diversity & Inclusion:** The team is committed to fostering an inclusive environment and engages in various community initiatives through the **MSE Foundation** to support youth programs and military commitments.

- **Caps Community Programs:** Initiatives such as Caps in the Community, Hockey Fights Cancer, and youth hockey programs emphasize our dedication to social responsibility.

### Latest Team News
Keep up with the latest happenings within the Caps community:

- **Current Season:** The Capitals are actively competing in the 2024-25 NHL season, with newly signed players and prospects looking to make an impact.
- **Recent Acquisitions:** The team has made key trades and player signings this offseason, strengthening our roster ahead of the season.
- **In the Community:** The Capitals continue to host community events, including the recent **Sentimental Street Hockey Extravaganza** and initiatives recognizing **Hockey Fights Cancer Month**.

### Team Stats and Standings
Follow our growth and performance in the league:

- **Season Schedule:** Stay informed with our upcoming games through the [2024-25 Season Schedule](#).
- **Standings:** The Capitals are competitive in the league, striving for top positions in the Metropolitan Division.
- **Roster Updates:** Our up-to-date lineup features skilled veterans and promising rookies, including renowned stars like Ovechkin leading the charge.

---

## Join Us!
Whether you're a prospective fan or player, we invite you to become part of the Capitals family. 

- **Attend a Game:** Join us at **Capital One Arena**, where the atmosphere is electric and the community spirit is palpable.
- **Get Your Tickets:** Explore various ticket options including season passes, group tickets, and special promotions tailored for all fans.

**Follow Us:**
Stay connected with the Capitals through our social media channels for real-time updates and behind-the-scenes content.
- [Facebook](#)
- [Twitter](#)
- [Instagram](#)
- [YouTube](#)

For more information, visit [WashingtonCaps.com](https://www.washingtoncaps.com) and become a part of our journey toward championship glory!