## Scrape Images

I used [this]("https://loodibee.com/nba/") site


In [None]:
import os
import time

import requests
from bs4 import BeautifulSoup

In [None]:
# load the nba page
r = requests.get("https://loodibee.com/nba/")
print(r.status_code)

In [None]:
# There is probably a better way to isolate the links to each team, but this worked for me as of 9/24/23
soup = BeautifulSoup(r.text, "html.parser")
links = soup.find_all(class_="sub-menu")
# print(links)

# get links to all nba teams
team_links = [link["href"] for link in links[5].find_all("a")]
team_links

In [None]:
image_dir = "../assets/"

images_scraped = 0
for tl in team_links:
    num_scraped = 0
    
    full_team_name = tl.split("/")[-2]

    r = requests.get(tl)
    if r.status_code != 200:
        print(f"{full_team_name} - status code: {r.status_code}")
        continue

    soup = BeautifulSoup(r.text, "html.parser")
    img_tags = soup.find_all("img")

    # this probably isn't perfect, but it seems to do ok
    # city, team = full_team_name.split("-")
    # src_list = [x for x in img_tags if city in x or team in x['src']]
    src_list = [x['src'] for x in img_tags if "png" in x['src']]
    
    # Check if the directory exists
    team_image_dir = os.path.join(image_dir, full_team_name)
    if not os.path.exists(team_image_dir):
        # If it doesn't exist, create a new directory
        os.makedirs(team_image_dir)

    for image in src_list:
        try:
            # time.sleep(1)
            img = requests.get(image)
            # print(r.status_code)
            with open(os.path.join(team_image_dir, image.split("/")[-1]), "wb") as f:
                f.write(img.content)
            num_scraped += 1
        except Exception as e:
            print(f"error scraping {image}")
            print(image)
            print(e)
            print()
            continue
    images_scraped += num_scraped
    print(f"{full_team_name}: {num_scraped} images scraped")
print("Done")

In [None]:
# pause notebook
assert False

### Trying NBA.com

In [1]:
import os
import time

import requests
from bs4 import BeautifulSoup
import pandas as pd
import cairosvg


In [2]:
team_df = pd.read_csv("../data/team_info.csv")
team_df.head()

id_to_abbr_dict = (
    team_df[["id", "abbreviation"]]
    .assign(id=team_df['id'].astype(str))
    .set_index("id")["abbreviation"]
    .str.lower()
    .to_dict()
)
# id_to_abbr_dict

In [3]:
nba_url = "https://www.nba.com/teams"

# load the nba page
r = requests.get(nba_url)
print(r.status_code)

200


In [8]:
# There is probably a better way to isolate the links to each team, but this worked for me as of 9/24/23
soup = BeautifulSoup(r.text, "html.parser")

image_src_urls = [img['src'] for img in soup.find_all("img") if "primary" in img['src']]
image_src_urls = list(set(image_src_urls))
print(len(image_src_urls))
# print(len(set(image_src_urls)))
image_src_urls[:5]

30


['https://cdn.nba.com/logos/nba/1610612766/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612750/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612759/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612754/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612746/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612764/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612745/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612744/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612748/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612765/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612740/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612756/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612749/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612757/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/1610612755/primary/L/logo.svg',
 'https://cdn.nba.com/logos/nba/16106127

In [14]:
output_dir = "../assets/_nba_svg"
# Check if the directory exists
if not os.path.exists(output_dir):
    # If it doesn't exist, create a new directory
    os.makedirs(output_dir)

for image_url in image_src_urls:
    try:
        # Send an HTTP GET request to the SVG URL
        response = requests.get(image_url)
        time.sleep(3)

        # Check if the request was successful (HTTP status code 200)
        if response.status_code == 200:
            # Specify the path where you want to save the SVG file
            # Change 'output_file.svg' to the desired file name and path
            id_from_path = image_url.split("/")[5]
            save_path = os.path.join(output_dir,f"{id_to_abbr_dict[id_from_path]}.svg")
            # Open a file in binary write mode and save the SVG content
            with open(save_path, "wb") as file:
                file.write(response.content)

            print(f"SVG file saved as {save_path}")
        else:
            print(f"Failed to fetch the SVG file. Status code: {response.status_code}")
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")

SVG file saved as ../assets/_nba/cha.svg
SVG file saved as ../assets/_nba/min.svg
SVG file saved as ../assets/_nba/sas.svg
SVG file saved as ../assets/_nba/ind.svg
SVG file saved as ../assets/_nba/lac.svg
SVG file saved as ../assets/_nba/was.svg
SVG file saved as ../assets/_nba/hou.svg
SVG file saved as ../assets/_nba/gsw.svg
SVG file saved as ../assets/_nba/mia.svg
SVG file saved as ../assets/_nba/det.svg
SVG file saved as ../assets/_nba/nop.svg
SVG file saved as ../assets/_nba/phx.svg
SVG file saved as ../assets/_nba/mil.svg
SVG file saved as ../assets/_nba/por.svg
SVG file saved as ../assets/_nba/phi.svg
SVG file saved as ../assets/_nba/den.svg
SVG file saved as ../assets/_nba/chi.svg
SVG file saved as ../assets/_nba/nyk.svg
SVG file saved as ../assets/_nba/bkn.svg
SVG file saved as ../assets/_nba/atl.svg
SVG file saved as ../assets/_nba/lal.svg
SVG file saved as ../assets/_nba/orl.svg
SVG file saved as ../assets/_nba/dal.svg
SVG file saved as ../assets/_nba/mem.svg
SVG file saved a

In [10]:
print(save_path)

../assets/_nba/cha.svg


In [20]:
# convert the svg files to png
png_dir = "../assets/_nba_png/"
for item in os.listdir(output_dir):
    try:
        # Convert the SVG to PNG
        svg_file = os.path.join(output_dir,item)
        png_file = os.path.join(png_dir, f"{item.split('.')[0]}.png")
        
        cairosvg.svg2png(url=svg_file, write_to=png_file)

        print(f"SVG file '{svg_file}' converted to PNG file '{png_file}'")

    except Exception as e:
        print(f"An error occurred: {str(e)}")
    


SVG file '../assets/_nba_svg/det.svg' converted to PNG file '../assets/_nba_png/det.png'
SVG file '../assets/_nba_svg/bkn.svg' converted to PNG file '../assets/_nba_png/bkn.png'
SVG file '../assets/_nba_svg/atl.svg' converted to PNG file '../assets/_nba_png/atl.png'
SVG file '../assets/_nba_svg/cha.svg' converted to PNG file '../assets/_nba_png/cha.png'
SVG file '../assets/_nba_svg/phi.svg' converted to PNG file '../assets/_nba_png/phi.png'
SVG file '../assets/_nba_svg/bos.svg' converted to PNG file '../assets/_nba_png/bos.png'
SVG file '../assets/_nba_svg/sas.svg' converted to PNG file '../assets/_nba_png/sas.png'
SVG file '../assets/_nba_svg/okc.svg' converted to PNG file '../assets/_nba_png/okc.png'
SVG file '../assets/_nba_svg/uta.svg' converted to PNG file '../assets/_nba_png/uta.png'
SVG file '../assets/_nba_svg/lac.svg' converted to PNG file '../assets/_nba_png/lac.png'
SVG file '../assets/_nba_svg/sac.svg' converted to PNG file '../assets/_nba_png/sac.png'
SVG file '../assets/_

In [None]:
# Replace with the path to your SVG file
svg_file = save_path#"input.svg"

# Replace with the desired output PNG file path
png_file = "output.png"

try:
    # Convert the SVG to PNG
    cairosvg.svg2png(url=svg_file, write_to=png_file)

    print(f"SVG file '{svg_file}' converted to PNG file '{png_file}'")

except Exception as e:
    print(f"An error occurred: {str(e)}")

### Renaming Logos to abbreviations

In [None]:
# I just did it manually 🤷‍♂️

# import pandas as pd

# team_df = pd.read_csv("../data/team_info.csv")
# print(team_df.shape)
# team_df.head(30)

In [None]:
# import os

# images = os.listdir("../assets/_logos/")
# print(len(images))
# for image in images:
#     if image == ".DS_Store":
#         continue
#     print(image)
    