In [None]:
#Beautiful Soup and Requests- Web Scraping

In [54]:
#Beautiful soup import allows the HTML to be used as searchable structure. bs4 is BeautifulSoup version 4
#Store website URL as string
#Requests.get downloads HTML content from the page and store in response object
#html.parser tells BeautifulSoup to use built in Python HTML parser


import requests
from bs4 import BeautifulSoup

url = "https://www.scrapethissite.com/pages/forms/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")


In [58]:
#Retrieve the page title

print(soup.title.text)


Hockey Teams: Forms, Searching and Pagination | Scrape This Site | A public sandbox for learning web scraping


In [60]:
#Get all team names on the page

#soup.select finds all matching HTML elements to ".team"
#select_one retrieves the first matching HTML element to ".name"
#text.strip() removes any spaces, tabs, etc. 
#for team in soup.select(".team") uses a loop through each team block that was found

team_names = [team.select_one(".name").text.strip() for team in soup.select(".team")]

#For loop - loops through each item in the list team_names. Each time through loop it assigns the next item to the variable name

for name in team_names:
    print(name)


Boston Bruins
Buffalo Sabres
Calgary Flames
Chicago Blackhawks
Detroit Red Wings
Edmonton Oilers
Hartford Whalers
Los Angeles Kings
Minnesota North Stars
Montreal Canadiens
New Jersey Devils
New York Islanders
New York Rangers
Philadelphia Flyers
Pittsburgh Penguins
Quebec Nordiques
St. Louis Blues
Toronto Maple Leafs
Vancouver Canucks
Washington Capitals
Winnipeg Jets
Boston Bruins
Buffalo Sabres
Calgary Flames
Chicago Blackhawks


In [62]:
#Find wins and losses for each team

#Assign all matching HTML elements containing ".team" to teams variable
#For team in teams uses a loop through teams variable we created
#Searching for the first matching "name", "wins", "losses" HTML element assigned to teams variable loop
#print(f") - f-string that inserts variables directly into the string making the code cleaner and quicker


teams = soup.select(".team")

for team in teams:
    name = team.select_one(".name").text.strip()
    wins = team.select_one(".wins").text.strip()
    losses = team.select_one(".losses").text.strip()
    print(f"{name} - Wins: {wins}, Losses: {losses}")


Boston Bruins - Wins: 44, Losses: 24
Buffalo Sabres - Wins: 31, Losses: 30
Calgary Flames - Wins: 46, Losses: 26
Chicago Blackhawks - Wins: 49, Losses: 23
Detroit Red Wings - Wins: 34, Losses: 38
Edmonton Oilers - Wins: 37, Losses: 37
Hartford Whalers - Wins: 31, Losses: 38
Los Angeles Kings - Wins: 46, Losses: 24
Minnesota North Stars - Wins: 27, Losses: 39
Montreal Canadiens - Wins: 39, Losses: 30
New Jersey Devils - Wins: 32, Losses: 33
New York Islanders - Wins: 25, Losses: 45
New York Rangers - Wins: 36, Losses: 31
Philadelphia Flyers - Wins: 33, Losses: 37
Pittsburgh Penguins - Wins: 41, Losses: 33
Quebec Nordiques - Wins: 16, Losses: 50
St. Louis Blues - Wins: 47, Losses: 22
Toronto Maple Leafs - Wins: 23, Losses: 46
Vancouver Canucks - Wins: 28, Losses: 43
Washington Capitals - Wins: 37, Losses: 36
Winnipeg Jets - Wins: 26, Losses: 43
Boston Bruins - Wins: 36, Losses: 32
Buffalo Sabres - Wins: 31, Losses: 37
Calgary Flames - Wins: 31, Losses: 37
Chicago Blackhawks - Wins: 36, L

In [66]:
#Create teams_data list and put our looped team function in
#Append data by adding goals for and goals against fields

teams_data = []

for team in teams:
    name = team.select_one(".name").text.strip()
    wins = team.select_one(".wins").text.strip()
    losses = team.select_one(".losses").text.strip()
    gf = team.select_one(".gf").text.strip()
    ga = team.select_one(".ga").text.strip()
    teams_data.append([name, wins, losses, gf, ga])

print(teams_data)

[['Boston Bruins', '44', '24', '299', '264'], ['Buffalo Sabres', '31', '30', '292', '278'], ['Calgary Flames', '46', '26', '344', '263'], ['Chicago Blackhawks', '49', '23', '284', '211'], ['Detroit Red Wings', '34', '38', '273', '298'], ['Edmonton Oilers', '37', '37', '272', '272'], ['Hartford Whalers', '31', '38', '238', '276'], ['Los Angeles Kings', '46', '24', '340', '254'], ['Minnesota North Stars', '27', '39', '256', '266'], ['Montreal Canadiens', '39', '30', '273', '249'], ['New Jersey Devils', '32', '33', '272', '264'], ['New York Islanders', '25', '45', '223', '290'], ['New York Rangers', '36', '31', '297', '265'], ['Philadelphia Flyers', '33', '37', '252', '267'], ['Pittsburgh Penguins', '41', '33', '342', '305'], ['Quebec Nordiques', '16', '50', '236', '354'], ['St. Louis Blues', '47', '22', '310', '250'], ['Toronto Maple Leafs', '23', '46', '241', '318'], ['Vancouver Canucks', '28', '43', '243', '315'], ['Washington Capitals', '37', '36', '258', '258'], ['Winnipeg Jets', '26

In [68]:
#Save data as csv file

#Import csv module to read/write csv files
#Import os module to let Python interact with computer operating system - downloads folder

import csv
import os


#Save to Downloads folder
#With open - opens file for writing and closes when done
#"W" puts it in write mode and overwrites if already exits
#Newline avoids adding extra blank lines
#Writer creates CSV writer object to write individual rows 
#writer.writerow - writes the header row
#writer.writerows(teams_data) - writes our list teams_data into csv file



save_path = "/Users/ryanmcdonnell/Downloads/hockey_teams.csv"
with open(save_path, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Team Name", "Wins", "Losses", "Goals For", "Goals Against"])
    writer.writerows(teams_data)

