In [1]:
from bs4 import BeautifulSoup
import requests as req
from time import sleep
import pandas as pd
import os
import time
import html2text
import altair as alt

# configure pandas
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
    )
}

In [2]:
def get_web_page_data(url):
    page = req.get(url, headers=headers).text
    soup = BeautifulSoup(page, "html.parser")
    return soup

# Data
------

In [3]:
# general player info
url = "https://www.chicagobears.com/team/players-roster/"
soup = get_web_page_data(url)
table = soup.find("table")

data_list = []
url_base = "https://www.chicagobears.com"
for row in table.find_all("tr")[1:]:
    url = row.find_all("td")[0].find("a")["href"]
    tds = row.find_all("td")
    data = {}
    data["url"] = url 
    data["name"] = tds[0].getText()
    data["#"] = tds[1].getText()
    data["pos"] =  tds[2].getText()
    data["height"] =  tds[3].getText()
    data["weight"] = tds[4].getText()
    data["age"] = tds[5].getText()
    data["exp"] = tds[6].getText()
    data["college"] = tds[7].getText()
    data_list.append(data)
df_player = pd.DataFrame(data_list)

In [4]:
df_player

Unnamed: 0,url,name,#,pos,height,weight,age,exp,college
0,/team/players-roster/angelo-blackson/,Angelo Blackson,90,DL,6-4,315,29,8,Auburn
1,/team/players-roster/josh-blackwell/,Josh Blackwell,39,DB,5-11,180,23,R,Duke
2,/team/players-roster/khari-blasingame/,Khari Blasingame,35,FB,6-0,233,26,4,Vanderbilt
3,/team/players-roster/larry-borom/,Larry Borom,75,OL,6-5,333,23,2,Missouri
4,/team/players-roster/jaquan-brisker/,Jaquan Brisker,9,DB,6-1,200,23,R,Penn State
5,/team/players-roster/ja-tyre-carter/,Ja'Tyre Carter,69,OL,6-3,311,23,R,Southern
6,/team/players-roster/isaiah-coulter/,Isaiah Coulter,82,WR,6-2,198,24,2,Rhode Island
7,/team/players-roster/dane-cruikshank/,Dane Cruikshank,29,DB,6-1,209,27,5,Arizona
8,/team/players-roster/trestan-ebner/,Trestan Ebner,25,RB,5-11,206,23,R,Baylor
9,/team/players-roster/justin-fields/,Justin Fields,1,QB,6-3,228,23,2,Ohio State


In [5]:
data_list = []
for row in df_player.itertuples():
    print(row.url)
    soup = get_web_page_data(url_base + row.url)
    table = soup.find("table", attrs={"summary":"Career Stats"})
    bio = soup.find("div", attrs={"class":"d3-l-grid--inner nfl-c-biography"})
    df = pd.read_html(str(table))[0]
    df["bio"] = html2text.html2text(str(bio))    
    df["url"] = row.url
    df["name"] = row.url.split("/")[-2]
    data_list.append(df)
df_player_detail = pd.concat(data_list)

/team/players-roster/angelo-blackson/
/team/players-roster/josh-blackwell/
/team/players-roster/khari-blasingame/
/team/players-roster/larry-borom/
/team/players-roster/jaquan-brisker/
/team/players-roster/ja-tyre-carter/
/team/players-roster/isaiah-coulter/
/team/players-roster/dane-cruikshank/
/team/players-roster/trestan-ebner/
/team/players-roster/justin-fields/
/team/players-roster/trenton-gill/
/team/players-roster/trevis-gipson/
/team/players-roster/kyler-gordon/
/team/players-roster/ryan-griffin/
/team/players-roster/n-keal-harry/
/team/players-roster/khalil-herbert/
/team/players-roster/elijah-hicks/
/team/players-roster/deandre-houston-carson/
/team/players-roster/eddie-jackson/
/team/players-roster/lamar-jackson/
/team/players-roster/teven-jenkins/
/team/players-roster/jaylon-johnson/
/team/players-roster/kingsley-jonathan/
/team/players-roster/braxton-jones/
/team/players-roster/jaylon-jones/
/team/players-roster/justin-jones/
/team/players-roster/velus-jones/
/team/players

# Visual
---

In [6]:
chart_title = "Chicago Bears 2022 Roster Age"
x_cfgs = {"shorthand":"age:Q", "title":"Age"}
y_cfgs = {"shorthand":"name:O", "title":""}

# background etc.
primary_color = "#0B162A" 
# font etc.
secondary_color = "#c83803"
axisLeftKwargs = dict(labelColor=secondary_color, titleColor=secondary_color, labelFontSize=15, titleFontSize=17)
axisBottomKwargs = dict(labelColor=secondary_color, titleColor=secondary_color,labelFontSize=15, titleFontSize=17)
legendKwargs = dict(labelColor=secondary_color, titleColor=secondary_color, strokeColor='gray', fillColor=primary_color, padding=10, cornerRadius=10,orient='top-right', labelFontSize=15, titleFontSize=15)


In [18]:
# this is dumb
def generate_bars(title, x, y, tooltip):    
    return alt.Chart(df_player_detail[~df_player_detail.SEASON.eq("TOTAL")], width=500, height=500, title=title)\
    .mark_bar(color="white")\
    .encode(
        x=alt.X(**x,
        ),
        y=alt.Y(**y, stack="zero"
        ),
        tooltip=[alt.Tooltip(**x), alt.Tooltip(**y)] + tooltip
    )
def generate_bars_none(title, x, y, tooltip):    
    return alt.Chart(df_player_detail[~df_player_detail.SEASON.eq("TOTAL")], width=500, height=500, title=title)\
    .mark_bar(color="white")\
    .encode(
        x=alt.X(**x, axis=None
        ),
        y=alt.Y(**y, stack="zero",
        ),
        tooltip=[alt.Tooltip(**x), alt.Tooltip(**y)] + tooltip
    )

In [19]:
selector = alt.selection_multi(fields=['url'])
selector_bio = alt.selection_single(fields=['url'])

chart = alt.Chart(df_player, width=400, height=1200, title=chart_title)\
.mark_bar(color="white")\
.encode(
    y=alt.X(**y_cfgs,
    ),
    x=alt.Y(**x_cfgs,
    #scale=alt.Scale(domain=[20, 35], domainMid=0)
    ),
    tooltip=[
        alt.Tooltip(**x_cfgs),
        alt.Tooltip(**y_cfgs),
        "pos",
        "height",
        "weight",
        "height",
        "exp",
        "college"
    ]
)
line = alt.Chart(df_player).mark_rule(color='firebrick').encode(
    x='mean(age):Q',
    size=alt.SizeValue(3)
)
chart = (chart + line).add_selection(selector).resolve_scale()

x_cfgs2 = {"shorthand":"TEAM:O", "title":"TEAM"}
y_cfgs2 = {"shorthand":"G:Q", "title":"Games Played"}
bar_detail = generate_bars_none("Player Detail - Games Played and Teams", x_cfgs2, y_cfgs2, tooltip=[alt.Tooltip(shorthand="GS",title="Games Started"), "SEASON", "name"]).transform_filter(selector)

x_cfgs3 = {"shorthand":"TEAM:O", "title":"TEAM"}
y_cfgs3 = {"shorthand":"GS:Q", "title":"Games Started"}
bar_detail2 = generate_bars("Player Detail - Games Started and Teams", x_cfgs3, y_cfgs3, tooltip=[alt.Tooltip(shorthand="G",title="Games Played"), "SEASON", "name"]).transform_filter(selector)

bar_details = alt.vconcat(bar_detail, bar_detail2).resolve_scale(x="shared")
     


In [21]:
# add bio
(chart | bar_details)\
.configure(background=primary_color)\
.configure_axis(gridColor="grey", gridDash=[6,4])\
.configure_axisLeft(**axisLeftKwargs)\
.configure_axisBottom(**axisBottomKwargs)\
.configure_title(color=secondary_color, fontSize=25)\
.configure_legend(**legendKwargs).save("foo_too_chart.json")
