In [1]:
import os
import requests
import json
from typing import List
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama # type: ignore
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


load_dotenv("../.env")

MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [2]:
class Website:
    url: str
    title: str
    body: str
    links: List[str]

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [3]:
caps = Website("https://www.nhl.com/capitals/")
print(caps.get_contents())

Webpage Title:
Washington Capitals | Washington Capitals
Webpage Contents:
Skip to Main Content
Tickets
All Capitals Tickets
Single Game Tickets
2024-25 Season Tickets
Partial Plans
Promotions Schedule
Holiday Packs
GOVX Military & Govt Employee Discount Program
Special Ticket Offers
Group Tickets
VIP Seating
Account Manager
Tickets for Business
Using the New NHL Mobile App
NHL Ticket Exchange
Digital Ticketing
Capital One Arena
Transformation - The Vaults
Schedule
2024-25 Season Schedule
Practice Schedule
Where To Watch
Home Jersey Schedule
Schedule Sync & Download
Team
Capitals Roster
Capitals Prospects
Caps Alumni
Management
Coaching Staff
Equipment and Trainers
Staff Directory
Monumental Sports and Entertainment
AHL Hershey Bears
ECHL South Carolina Stingrays
News
Capitals News
Capitals Today
Dump N' Chase
Community News
Ted's Take
Video
All Video
Game Highlights
Mic'd Up
Capitals Locker Room
Caps365
Rinkside Update
Off the Ice
Coach's Corner
Caps Game Entertainment
Capitals Alumni

In [4]:
print(caps.links)

['#main-content', '/capitals', '/capitals', '/capitals/tickets/', 'https://www.ticketmaster.com/washington-capitals-tickets/artist/806039?brand=capitals&wt.mc_id=NHL_TEAM_WSH_SINGLE_GAME_TIX_LINK&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425_sgb&utm_content=tickets_nav', 'https://www.nhl.com/capitals/club-red-365/', 'https://www.nhl.com/capitals/tickets/partial-plans', 'https://www.nhl.com/capitals/tickets/promotions', 'https://www.nhl.com/capitals/tickets/holiday-packs', 'https://www.govx.com/tickets/entertainers/30/washington-capitals/', 'https://www.nhl.com/capitals/tickets/offers', 'https://www.nhl.com/capitals/tickets/group-tickets', 'https://www.nhl.com/capitals/tickets/vip', 'https://am.ticketmaster.com/monumental/?wt.mc_id=NHL_TEAM_WSH_ACCOUNT_MANAGER_TIX&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425sgb&utm_content=account_manager_tix', 'https://www.nhl.com/capitals/tickets/business', 'https://www.nhl.com/capitals/tickets/mobile-app-setup', '

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the team, \
such as links to an About page, Team, News, Schedule, History, Stats pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "team page", "url": "https://full.url/goes/here/team"},
        {"type": "news page": "url": "https://another.full.url/news"},
        {"type": "schedule page": "url": "https://another.full.url/schedule"},
        {"type": "history page": "url": "https://another.full.url/history"},
        {"type": "stats page": "url": "https://another.full.url/stats"},
        {"type": "standings page": "url": "https://another.full.url/standings"},
    ]
}
"""

In [6]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the team, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, Tickets, Video, Listen, Community, Fans, Youth Hockey, Shop, League, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [7]:
print(get_links_user_prompt(caps))

Here is the list of links on the website of https://www.nhl.com/capitals/ - please decide which of these are relevant web links for a brochure about the team, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, Tickets, Video, Listen, Community, Fans, Youth Hockey, Shop, League, email links.
Links (some might be relative links):
#main-content
/capitals
/capitals
/capitals/tickets/
https://www.ticketmaster.com/washington-capitals-tickets/artist/806039?brand=capitals&wt.mc_id=NHL_TEAM_WSH_SINGLE_GAME_TIX_LINK&utm_source=washcaps_tm&utm_medium=web_organic&utm_campaign=2425_sgb&utm_content=tickets_nav
https://www.nhl.com/capitals/club-red-365/
https://www.nhl.com/capitals/tickets/partial-plans
https://www.nhl.com/capitals/tickets/promotions
https://www.nhl.com/capitals/tickets/holiday-packs
https://www.govx.com/tickets/entertainers/30/washington-capitals/
https://www.nhl.com/capitals/tickets/offers
https://www.nhl.com/capitals/tickets/group-tickets
htt

In [8]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)

In [9]:
get_links("https://www.nhl.com/capitals/")

{'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals/'},
  {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/management'},
  {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news/'},
  {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'},
  {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history/'},
  {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'},
  {'type': 'standings page', 'url': 'https://www.nhl.com/capitals/standings'}]}

In [10]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [11]:
print(get_all_details("https://www.nhl.com/capitals/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals/'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/management'}, {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news/'}, {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'}, {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history/'}, {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'}, {'type': 'standings page', 'url': 'https://www.nhl.com/capitals/standings'}]}
Landing page:
Webpage Title:
Washington Capitals | Washington Capitals
Webpage Contents:
Skip to Main Content
Tickets
All Capitals Tickets
Single Game Tickets
2024-25 Season Tickets
Partial Plans
Promotions Schedule
Holiday Packs
GOVX Military & Govt Employee Discount Program
Special Ticket Offers
Group Tickets
VIP Seating
Account Manager
Tickets for Business
Using the New NHL Mobile App
NHL Ticket Exchange
Digital Ticketing
Capital One Arena
Transformation - The Va

In [12]:
system_prompt = "You are a sports marketing analyst that analyzes the contents of several relevant pages from a sports team website \
and creates a short brochure about the team for prospective fans and players to recruit. Respond in markdown.\
Include details of team history, team culture, team news, and team stats if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [13]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the team in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:40_000] # Truncate if more than 40,000 characters
    return user_prompt

In [14]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [15]:
stream_brochure("Washington Capitals", "https://www.nhl.com/capitals")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.nhl.com/capitals'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/management'}, {'type': 'team page', 'url': 'https://www.nhl.com/capitals/team/coaching-staff'}, {'type': 'news page', 'url': 'https://www.nhl.com/capitals/news'}, {'type': 'schedule page', 'url': 'https://www.nhl.com/capitals/schedule'}, {'type': 'history page', 'url': 'https://www.nhl.com/capitals/history'}, {'type': 'stats page', 'url': 'https://www.nhl.com/capitals/stats'}, {'type': 'standings page', 'url': 'https://www.nhl.com/capitals/standings'}, {'type': 'roster page', 'url': 'https://www.nhl.com/capitals/roster'}, {'type': 'prospects page', 'url': 'https://www.nhl.com/capitals/prospects'}]}


# Washington Capitals Brochure

Welcome to the Washington Capitals, a proud member of the National Hockey League (NHL) and one of the most passionate sports teams representing the U.S. capital! Here's a snapshot of our team history, culture, news, and stats.

---

## Team History

The Capitals were founded in 1974 and have a rich history that culminated in their first Stanley Cup victory in 2018. Under the leadership of **Dick Patrick**, the team has made significant strides, qualifying for postseason play 32 times and winning three Presidents' Trophies (2009-10, 2015-16, 2016-17). The Capitals' 2018 Stanley Cup run featured notable performances, including tying the NHL record for 10 road wins during the playoffs.

### Key Milestones:
- **1974**: Established as an NHL franchise.
- **1998**: Reached the Stanley Cup Final for the first time.
- **2018**: Won the Stanley Cup, marking a historic achievement in franchise history.

## Team Culture

The Capitals embody a spirit of **community engagement**, **pride** in athletic excellence, and **inclusive teamwork**. Players are encouraged to participate in community initiatives, such as "Caps in the Community," which promotes diversity and inclusion. Our culture also emphasizes youth development through programs like "Caps In School," ensuring the next generation of hockey fans is nurtured.

### Highlights of Team Culture:
- **Diversity and Inclusion**: Committed to fostering a welcoming environment.
- **Community Initiatives**: Various programs to give back to the DC area.
- **Community Heroes**: Recognizing local figures who have made a significant impact.

## Team News

Stay connected with the Capitals through multiple news sources:
- Weekly updates via **"Capitals Today"**.
- In-depth coverage of games and team updates with **"Dump N' Chase"**.
- Insightful posts from ownership and management through **"Ted's Take"**.

### Recent Highlights:
- Ongoing roster changes, including significant acquisitions and recalls.
- The launch of special game night activities such as **Caps Fight Cancer Night**.

## Team Stats

As of the latest season, the Capitals have shown competitive strength:
- **Overall Record**: 1,616 wins, 1,155 losses, 201 ties since 1982.
- **Decade Dominance**: 2010s marked the most wins of any NHL team (465).
- **Notable Players**: Home to legendary figures like **Alex Ovechkin**, widely regarded as one of the greatest players in hockey history.

### 2024-25 Season Prospects:
- Regular updates and game schedules are available to track the Capitals' performance.
- Upcoming season focuses on embracing young talent while maintaining a competitive edge in the league.

---

Join the **Washington Capitals** in our pursuit of excellence! Whether you’re a die-hard fan or a prospective player, we welcome you to be part of our journey as we continue building our legacy in ALL CAPS! Experience the excitement live at the **Capital One Arena** or follow our journey online.

---

For tickets and more information, visit [WashingtonCaps.com](https://www.washingtoncaps.com) or connect with us on social media! 

**Go Caps!** 🏒