In [2]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup

# Import data manipulation modules
import pandas as pd
import numpy as np

# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt

import plotly.graph_objects as go

# Passing Comparisons

In [3]:
# URL of page
season = 2020

url = f'https://www.pro-football-reference.com/years/{season}/passing.htm'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)

# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

# Collect table rows
rows = stats_page.findAll('tr')[1:]

# Get stats from each row
qb_stats = []
for i in range(len(rows)):
    qb_stats.append([col.getText() for col in rows[i].findAll('td')])
    
# Create DataFrame from our scraped data
data = pd.DataFrame(qb_stats, columns=column_headers[1:])
data.head()
data['Season'] = season

# Rename sack yards column to `Yds_Sack`
new_columns = data.columns.values
new_columns[-7] = 'Yds_Sack'
data.columns = new_columns

data

Unnamed: 0,Player,Tm,Age,Pos,G,GS,QBrec,Cmp,Att,Cmp%,...,Rate,QBR,Sk,Yds_Sack,NY/A,ANY/A,Sk%,4QC,GWD,Season
0,Matt Ryan,ATL,35,QB,16,16,4-12-0,407,626,65.0,...,93.3,66.9,41,257,6.48,6.52,6.1,,,2020
1,Tom Brady,TAM,43,QB,16,16,11-5-0,401,610,65.7,...,102.2,72.6,21,143,7.12,7.53,3.3,3,3,2020
2,Ben Roethlisberger,PIT,38,QB,15,15,12-3-0,399,608,65.6,...,94.1,60.1,13,118,5.93,6.27,2.1,4,4,2020
3,Justin Herbert,LAC,22,QB,15,15,6-9-0,396,595,66.6,...,98.3,69.6,32,218,6.57,6.84,5.1,1,3,2020
4,Patrick Mahomes *,KAN,25,QB,15,15,14-1-0,390,588,66.3,...,108.2,82.9,22,147,7.53,8.33,3.6,3,3,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,Logan Thomas,WAS,29,TE,16,15,,1,1,100.0,...,118.7,47.7,0,0,28.00,28.00,0.0,,,2020
111,Tommy Townsend,KAN,24,p,16,0,,1,1,100.0,...,118.7,13.9,0,0,13.00,13.00,0.0,,,2020
112,Greg Ward,PHI,25,wr,16,10,,1,1,100.0,...,118.7,0.9,0,0,15.00,15.00,0.0,,,2020
113,Sammy Watkins,KAN,27,wr,10,9,,0,1,0.0,...,0.0,0.0,0,0,0.00,-45.00,0.0,,,2020


In [4]:
player1 = 'Matt Ryan'
player2 = 'Tom Brady'

In [5]:
#filter by target season
data_radar = data.loc[data['Season'] == target_season]

# Select stat categories
categories = ['Cmp%', 'Yds', 'TD', 'Int', 'Y/A', 'Rate']

# Create data subset for radar chart
data_radar = data[['Player', 'Tm'] + categories]

# Convert data to numerical values
for i in categories:
    data_radar[i] = pd.to_numeric(data[i])
    
# Remove ornamental characters for achievements
data_radar['Player'] = data_radar['Player'].str.replace('*', '')
data_radar['Player'] = data_radar['Player'].str.replace('+', '')
data_radar['Player'] = data_radar['Player'].str.strip()

# Filter by passing yards
data_radar = data_radar[data_radar['Yds'] > 1500]

# Create columns with percentile rank
for i in categories:
    data_radar[i + '_Rank'] = data_radar[i].rank(pct=True)

# We need to flip the rank for interceptions
#data_radar['Int_Rank'] = 1 - data_radar['Int_Rank']

# Examine data
data_radar.head()



NameError: name 'target_season' is not defined

In [None]:
list1 = data_radar.loc[data_radar['Player'] == player1, ['Cmp%_Rank','Yds_Rank','TD_Rank','Int_Rank','Y/A_Rank','Rate_Rank']].values
print(list1)

list2 = data_radar.loc[data_radar['Player'] == player2, ['Cmp%_Rank','Yds_Rank','TD_Rank','Int_Rank','Y/A_Rank','Rate_Rank']].values
print(list2)

In [None]:
#plot
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=list1[0],
      theta=categories,
      fill='toself',
      name=player1
))
fig.add_trace(go.Scatterpolar(
      r=list2[0],
      theta=categories,
      fill='toself',
      name=player2
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=False,
      range=[0, 1]
    )),
  showlegend=True
)

fig.show()