#### Extracting data from web pages with BeautifulSoup

In [4]:
import requests
from bs4 import BeautifulSoup

# Target webpage URL (replace with your desired URL)
url = 'https://www.fangraphs.com/leaders/major-league'

# Fetch the webpage content
try:
    response = requests.get(url)
    response.raise_for_status() # Check for HTTP errors
except requests.exceptions.RequestException as e:
    print(f"Error fetching the webpage: {e}")
else:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Now, 'soup' holds a structured representation of the webpage, ready for our exploration

In [5]:
headings = soup.find_all('h1')

for heading in headings:
    print(heading.text.strip()) # Strip removes any extra whitespace

Major League Leaders


In [6]:
links = soup.find_all('a')

for link in links:
    href = link.get('href')
    if href:  # Make sure the link has a valid 'href' attribute
        print(href)

/
https://www.fangraphs.com/blogs/wp-login.php?redirect_to=null
https://plus.fangraphs.com/product/fangraphs-membership/
https://plus.fangraphs.com/shop/
https://plus.fangraphs.com/product/fangraphs-gift-membership/
https://plus.fangraphs.com/product/fangraphs-donation/
/fantasy-tools/player-rater
/fantasy-tools/auction-calculator
//ottoneu.fangraphs.com/
//ottoneu.fangraphs.com/
//ottoneu.fangraphs.com/support
//www.fangraphs.com/fantasy/category/ottoneu/
/blog-roll
https://www.fangraphs.com/blogs/
/blog-roll?category=Effectively+Wild
https://www.fangraphs.com/prospects/
/rotographs
https://www.fangraphs.com/fantasy/category/podcast/
https://www.fangraphs.com/fantasy/category/field-of-streams/
https://fantasy.fangraphs.com/category/beat-the-shift/
https://www.fangraphs.com/community/
https://www.fangraphs.com/tht/
https://www.fangraphs.com/not/
https://www.fangraphs.com/techgraphs/
https://www.fangraphs.com/plus/
https://www.fangraphs.com/tht/tht-live/
https://www.fangraphs.com/tht/ca

### Real Life Example: Sports Data Analyst

In [8]:
import requests
from bs4 import BeautifulSoup

# Target a Sports Data webpage URL (replace with your desired URL)
url = 'https://www.fangraphs.com/leaders/major-league'

# Fetch the webpage content
try:
    response = requests.get(url)
    response.raise_for_status()  # Check for HTTP errors
except requests.exceptions.RequestException as e:
    print(f"Error fetching the webpage: {e}")
else:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Now, 'soup' holds the structured represntation of the webpage, ready for our exploration!

    sports = soup.find_all('div', class_='leaders-major_leaders-major_table_hcmbm')

    for sports in sports:
        # Extract the player name:
        player = sport.find('td', class_='align-left fixed').text.strip()
        # Extract the number of games played by the player:
        games_played = sport.find('td', class_='align-right').text.strip()
        print(f"Player: {player}, Games Played: {games_played}\n")