In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import re
import time
from datetime import date
from bs4 import BeautifulSoup, Comment

dpoy_table_list = []

today = date.today()

# input URL and use BeautifulSoup to parse through the page
url = f'https://www.basketball-reference.com/awards/dpoy.html'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')

# Grab the table element that has NBA player statistics
dpoy_table = soup.select_one('#div_dpoy_NBA')

# comment = allstar_table.find(text=lambda text: isinstance(text, Comment))
# if comment.find("table") > 0:
#         comment_soup = BeautifulSoup(comment, 'html.parser')
#         table = comment_soup.find_all("table")

# Grab data from table and put it into the list created above
for tr in dpoy_table.select('tr:has(td)'):
    tds = [td.get_text(strip=True) for td in tr.select('td')]
    dpoy_table_list.append(tds)


dpoy_table_list


[['NBA',
  'Marcus Smart',
  '(V)',
  '27',
  'BOS',
  '71',
  '32.3',
  '12.1',
  '3.8',
  '5.9',
  '1.7',
  '0.3',
  '.418',
  '.331',
  '.793',
  '5.6',
  '.116'],
 ['NBA',
  'Rudy Gobert',
  '(V)',
  '28',
  'UTA',
  '71',
  '30.8',
  '14.3',
  '13.5',
  '1.3',
  '0.6',
  '2.7',
  '.675',
  '.000',
  '.623',
  '11.3',
  '.248'],
 ['NBA',
  'Giannis Antetokounmpo',
  '(V)',
  '25',
  'MIL',
  '63',
  '30.4',
  '29.5',
  '13.6',
  '5.6',
  '1.0',
  '1.0',
  '.553',
  '.304',
  '.633',
  '11.1',
  '.279'],
 ['NBA',
  'Rudy Gobert',
  '(V)',
  '26',
  'UTA',
  '81',
  '31.8',
  '15.9',
  '12.9',
  '2.0',
  '0.8',
  '2.3',
  '.669',
  '',
  '.636',
  '14.4',
  '.268'],
 ['NBA',
  'Rudy Gobert',
  '(V)',
  '25',
  'UTA',
  '56',
  '32.4',
  '13.5',
  '10.7',
  '1.4',
  '0.8',
  '2.3',
  '.622',
  '',
  '.682',
  '8.1',
  '.214'],
 ['NBA',
  'Draymond Green',
  '(V)',
  '26',
  'GSW',
  '76',
  '32.5',
  '10.2',
  '7.9',
  '7.0',
  '2.0',
  '1.4',
  '.418',
  '.308',
  '.709',
  '8.2',
  

In [14]:
dpoy_df = pd.DataFrame(dpoy_table_list)
dpoy_list = dpoy_df[1].to_list()

# dd/mm/YY
dpoy_year = int(today.strftime("%Y"))
if int(today.strftime("%m")) < 7:
    dpoy_year = dpoy_year - 1

dpoy_seen_df_list = []
for item in dpoy_list:
    dpoy_seen_df_list.append([item,dpoy_year])
    dpoy_year = dpoy_year - 1

dpoy_seen_df = pd.DataFrame(dpoy_seen_df_list, columns = ['Player', 'Year'])
agg_dpoy_seen_years_list = []
agg_dpoy_seen_list = []
player_dpoy_count_list = []

for player in dpoy_seen_df['Player'].unique():
    agg_dpoy_seen_list.append(player)
    agg_dpoy_seen_years_list.append(dpoy_seen_df.loc[dpoy_seen_df['Player'] == player]['Year'].to_list())
    player_dpoy_count_list.append(dpoy_seen_df.loc[dpoy_seen_df['Player'] == player]['Year'].count())
    
agg_dpoy_seen_df = pd.DataFrame(columns = ['Player','X_DPOY','Years'])
agg_dpoy_seen_df['Player'] = agg_dpoy_seen_list
agg_dpoy_seen_df['Years'] = agg_dpoy_seen_years_list
agg_dpoy_seen_df['X_DPOY'] = player_dpoy_count_list

agg_dpoy_seen_df

Unnamed: 0,Player,X_DPOY,Years
0,Marcus Smart,1,[2022]
1,Rudy Gobert,3,"[2021, 2019, 2018]"
2,Giannis Antetokounmpo,1,[2020]
3,Draymond Green,1,[2017]
4,Kawhi Leonard,2,"[2016, 2015]"
5,Joakim Noah,1,[2014]
6,Marc Gasol,1,[2013]
7,Tyson Chandler,1,[2012]
8,Dwight Howard,3,"[2011, 2010, 2009]"
9,Kevin Garnett,1,[2008]


Unnamed: 0,Player,Year
1,Rudy Gobert,2021
3,Rudy Gobert,2019
4,Rudy Gobert,2018


[(['Paul Arizin*Vince BorylaBob Cousy*Joe Fulks*Harry Gallatin*Ed Macauley*Dick McGuire*Andy Phillip*Red RochaDolph Schayes*',
   'Ralph BeardFrankie BrianBob Davies*Dike EddlemanLarry FoustAlex GrozaGeorge Mikan*Vern Mikkelsen*Jim Pollard*Fred Schaus'],
  1951),
 (['Paul Arizin*Bob Cousy*Joe Fulks*Harry Gallatin*Ed Macauley*Dick McGuire*Andy Phillip*Red RochaDolph Schayes*(1)Fred ScolariMax Zaslofsky',
   'Leo BarnhorstFrankie BrianBob Davies*Dike EddlemanLarry Foust(2)George Mikan*Vern Mikkelsen*Jim Pollard*Arnie Risen*Paul WaltherBobby Wanzer*'],
  1952),
 (['Don Barksdale*Carl Braun*Bob Cousy*Bill GaborHarry Gallatin*Neil Johnston*Ed Macauley*Dolph Schayes*Fred Scolari(1)Paul SeymourBill Sharman*',
   'Leo BarnhorstBob Davies*Larry FoustMel HutchinsSlater Martin*George Mikan*Vern Mikkelsen*Andy Phillip*Arnie Risen*Bobby Wanzer*'],
  1953),
 (['Carl Braun*Bob Cousy*Ray FelixHarry Gallatin*Neil Johnston*Ed Macauley*Dick McGuire*Dolph Schayes*Paul SeymourBill Sharman*',
   'Bob Davies

Unnamed: 0,All_Stars,Year
0,[Paul Arizin*Vince BorylaBob Cousy*Joe Fulks*H...,1951
1,[Paul Arizin*Bob Cousy*Joe Fulks*Harry Gallati...,1952
2,[Don Barksdale*Carl Braun*Bob Cousy*Bill Gabor...,1953
3,[Carl Braun*Bob Cousy*Ray FelixHarry Gallatin*...,1954
4,[Paul Arizin*Carl Braun*Bob Cousy*Harry Gallat...,1955
...,...,...
67,[Giannis AntetokounmpoStephen CurryJoel Embiid...,2019
68,[Bam AdebayoGiannis AntetokounmpoJimmy ButlerJ...,2020
69,[Bradley BealDevin BookerMike ConleyAnthony Da...,2021
70,[LaMelo BallDevin BookerKevin DurantJoel Embii...,2022


<table>
<caption>LeBron</caption> <tr class="">
<td class="left"><p> <a href="/players/b/brownja02.html">Jaylen Brown</a> </p><p> <a href="/players/c/curryst01.html">Stephen Curry</a> </p><p> <a href="/players/d/doncilu01.html">Luka Dončić</a> </p><p> <a href="/players/e/edwaran01.html">Anthony Edwards</a> </p><p> <a href="/players/e/embiijo01.html">Joel Embiid</a> </p><p> <a href="/players/f/foxde01.html">De'Aaron Fox</a> </p><p> <a href="/players/g/georgpa01.html">Paul George</a> </p><p> <a href="/players/h/halibty01.html">Tyrese Haliburton</a> </p><p> <a href="/players/i/irvinky01.html">Kyrie Irving</a> </p><p> <a href="/players/j/jacksja02.html">Jaren Jackson Jr.</a> </p><p> <a href="/players/j/jamesle01.html">LeBron James</a> </p><p> <a href="/players/j/jokicni01.html">Nikola Jokić</a> </p><p> <a href="/players/r/randlju01.html">Julius Randle</a> </p><p> <a href="/players/w/willizi01.html">Zion Williamson</a> </p></td>
</tr>
</table>

In [53]:
today = date.today()

# dd/mm/YY
if int(today.strftime("%m")) > 6:
    current_season = int(today.strftime("%Y"))
else:
    current_season = (int(today.strftime("%Y")) - 1)

all_league_player_list = []
all_league_team_list = []
league_list = []


for item in all_league_table_list:
    counter = 3
    list_navigator = 3
    while list_navigator < 8:
        all_league_player_list.append(item[list_navigator])
        all_league_team_list.append(item[1])
        list_navigator = list_navigator + 1
        league_list.append(item[0])


cleaned_all_league_player_list = []
cleaned_all_league_team_list = []
cleaned_league_list = []
all_league_year_list = []

for item in all_league_player_list:
    if item == '':
        continue
    else:
        cleaned_all_league_player_list.append(item)

for item in all_league_team_list:
    if item == '':
        continue
    else:
        cleaned_all_league_team_list.append(item)

for item in league_list:
    if item == '':
        continue
    else:
        cleaned_league_list.append(item)

        
current_season = current_season + 1
        
for i in range(len(cleaned_all_league_team_list)):      
    if ((cleaned_all_league_team_list[i] == '1st') and (cleaned_all_league_team_list[i - 1] != '1st') and (cleaned_league_list[i] != 'ABA')):
        current_season = current_season - 1
        all_league_year_list.append(current_season)
    else:
        all_league_year_list.append(current_season)

        
all_league_df = pd.DataFrame(
    {'Year': all_league_year_list,
     'Player': cleaned_all_league_player_list,
     'Team': cleaned_all_league_team_list
    })

all_league_df


Unnamed: 0,Year,Player,Team
0,2022,Nikola JokićC,1st
1,2022,Giannis AntetokounmpoF,1st
2,2022,Jayson TatumF,1st
3,2022,Luka DončićG,1st
4,2022,Devin BookerG,1st
...,...,...,...
1015,1947,Frankie Baumholtz,2nd
1016,1947,Ernie Calverley,2nd
1017,1947,Chick Halbert,2nd
1018,1947,John Logan,2nd


Unnamed: 0,Year,Player,Team
620,1977,Kareem Abdul-JabbarC,1st
621,1977,Elvin HayesF,1st
622,1977,David ThompsonF,1st
623,1977,Pete MaravichG,1st
624,1977,Paul WestphalG,1st
625,1977,Bill WaltonC,2nd
626,1977,Julius ErvingF,2nd
627,1977,George McGinnisF,2nd
628,1977,George GervinG,2nd
629,1977,Jo Jo WhiteG,2nd


In [None]:
final_hof_table_header_list = hof_table_header_list[1]
final_hof_table_header_list.pop(0)

hof_df = pd.DataFrame(hof_table_list, columns=final_hof_table_header_list)
#hof_df = hof_df.rename(columns={'0': 'Name', '1': 'Role'})
hof_df

In [None]:
hof_names = hof_df['Name'].to_list()

# test1 = re.findall(r"^(.*?)WNBA", hof_names[0])
# test2 = re.findall(r"^(.*?)WNBA", hof_names[1])

cleaning_list = ['^(.*?)WNBA', '^(.*?)CBBplayer', '^(.*?)Coach', '^(.*?)CBBcoach', '^(.*?)Exec', '^(.*?)Ref', '^(.*?)Player', '^(.*?)Oly']
temp_hof_name_list = []

for name in hof_names:
#     temp_name_holder = 'a'
    cleaned_name = re.findall(r'^(.*?)\/', name)
    if cleaned_name == []:
        temp_hof_name_list.append(name)
    else:
        temp_hof_name_list.append(cleaned_name)

new_temp_name_list = []

for temp_name in temp_hof_name_list:
    if type(temp_name) == list:
        new_temp_name = str(temp_name[0])
        new_temp_name_list.append(new_temp_name)
    else:
        new_temp_name_list.append(temp_name)

cleaned_hof_name_list = []   

for temp_name in new_temp_name_list:
    name_holder = temp_name
    counter = 0 
    
    for string in cleaning_list:
        new_cleaned_name = re.findall(string, temp_name)

        if new_cleaned_name != []:
            cleaned_hof_name_list.append(new_cleaned_name)
            break
        elif (new_cleaned_name == [] and counter == len(cleaning_list)):
            cleaned_hof_name_list.append(name_holder)
        else:
            counter = counter + 1
            continue

            
final_temp_name_list = []

for final_name in cleaned_hof_name_list:
    if type(final_name) == list:
        final_name_string = str(final_name[0])
        final_temp_name_list.append(final_name_string)
    else:
        final_temp_name_list.append(final_name)

hof_df = pd.DataFrame(final_temp_name_list, columns = ["Player"])
hof_df


In [None]:
new_temp_name_list = []

for temp_name in temp_hof_name_list:
    if type(temp_name) == list:
        new_temp_name = str(temp_name[0])
        new_temp_name_list.append(new_temp_name)
    else:
        new_temp_name_list.append(temp_name)

# test = str(temp_hof_name_list[1][0])
# print(type(test))
# test
new_temp_name_list