Create the file that will show player award, stanley cup wins, and all-star game appearances.

In [91]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import time
import requests

NHL Art Ross Trophy Winners,
Award presented to League's scoring champion


In [92]:
# Scrape the data for the Art Ross Award, Scoring Champion
url = f'https://www.nhl.com/news/nhl-art-ross-trophy-winners-complete-list/c-287899824?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), 'Art Ross']
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
art_ross_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
art_ross_df['Year'] = art_ross_df['Year'].astype(int)
# filter dataframe 
# art_ross_df = art_ross_df.loc[art_ross_df['Year'] >= 1967,:]
art_ross_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Leon Draisaitl,Edmonton Oilers,Art Ross
1,2019,Nikita Kucherov,Tampa Bay Lightning,Art Ross
2,2018,Connor McDavid,Edmonton Oilers,Art Ross
3,2017,Connor McDavid,Edmonton Oilers,Art Ross
4,2016,Patrick Kane,Chicago Blackhawks,Art Ross


In [93]:
# Group the data by the player's name column and count
art_ross_grp_df = art_ross_df.groupby(by="Player", as_index=False).count()
art_ross_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Ace Bailey,1,1,1
1,Alex Ovechkin,1,1,1
2,Babe Dye,2,2,2
3,Bernie Geoffrion,2,2,2
4,Bill Cook,2,2,2


In [94]:
# Drop columns that are not needed
art_ross_grp_df = art_ross_grp_df.drop(['Team', 'Award'], axis=1)
art_ross_grp_df.head()

Unnamed: 0,Player,Year
0,Ace Bailey,1
1,Alex Ovechkin,1
2,Babe Dye,2
3,Bernie Geoffrion,2
4,Bill Cook,2


In [95]:
# Rename the year column to the award name
art_ross_grp_df = art_ross_grp_df.rename(columns={"Year": "Art Ross Awards"})
art_ross_grp_df.head()

Unnamed: 0,Player,Art Ross Awards
0,Ace Bailey,1
1,Alex Ovechkin,1
2,Babe Dye,2
3,Bernie Geoffrion,2
4,Bill Cook,2


In [96]:
# Change names to match the master stats
art_ross_df.replace("Bryan Hextall", "Bryan Hextall, Sr.")
art_ross_grp_df.head()


Unnamed: 0,Player,Art Ross Awards
0,Ace Bailey,1
1,Alex Ovechkin,1
2,Babe Dye,2
3,Bernie Geoffrion,2
4,Bill Cook,2


NHL Hart Memorial Trophy Winners, Award presente to the regular season MVP.

In [97]:
# Scrape the data for the Hart Memorial Award, League MVP
url = f'https://www.nhl.com/news/nhl-hart-memorial-trophy-winners-complete-list/c-287743272?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), 'Hart Memorial']
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
hart_memorial_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
hart_memorial_df['Year'] = hart_memorial_df['Year'].astype(int)
# filter dataframe 
# hart_memorial_df = hart_memorial_df.loc[hart_memorial_df['Year'] >= 1967,:]
hart_memorial_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Leon Draisaitl,Edmonton Oilers,Hart Memorial
1,2019,Nikita Kucherov,Tampa Bay Lightning,Hart Memorial
2,2018,Taylor Hall,New Jersey Devils,Hart Memorial
3,2017,Connor McDavid,Edmonton Oilers,Hart Memorial
4,2016,Patrick Kane,Chicago Blackhawks,Hart Memorial


In [98]:
# Group the data by the player's name column and count
hart_grp_df = hart_memorial_df.groupby(by="Player", as_index=False).count()
hart_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Al Rollins,1,1,1
1,Alex Ovechkin,3,3,3
2,Andy Bathgate,1,1,1
3,Aurele Joliat,1,1,1
4,Babe Pratt,1,1,1


In [99]:
# Drop columns that are not needed
hart_grp_df = hart_grp_df.drop(['Team', 'Award'], axis=1)
hart_grp_df.head()

Unnamed: 0,Player,Year
0,Al Rollins,1
1,Alex Ovechkin,3
2,Andy Bathgate,1
3,Aurele Joliat,1
4,Babe Pratt,1


In [100]:
# Rename the year column to the award name
hart_grp_df = hart_grp_df.rename(columns={"Year": "Hart Awards"})
hart_grp_df.head()

Unnamed: 0,Player,Hart Awards
0,Al Rollins,1
1,Alex Ovechkin,3
2,Andy Bathgate,1
3,Aurele Joliat,1
4,Babe Pratt,1


NHL Conn Smythe Trophy Winners, Award presented to the playoffs MVP

In [101]:
# Scrape the data for the Conn Smythe Trophy Award, Playoff MVP
url = 'https://www.nhl.com/news/nhl-conn-smythe-trophy-winners-complete-list/c-287709808?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), "Conn Smythe"]
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
conn_smythe_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
conn_smythe_df['Year'] = conn_smythe_df['Year'].astype(int)
# filter dataframe 
# conn_smythe_df = conn_smythe_df.loc[conn_smythe_df['Year'] >= 1967,:]
conn_smythe_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Victor Hedman,Tampa Bay Lightning,Conn Smythe
1,2019,Ryan O'Reilly,St. Louis Blues,Conn Smythe
2,2018,Alex Ovechkin,Washington Capitals,Conn Smythe
3,2017,Sidney Crosby,Pittsburgh Penguins,Conn Smythe
4,2016,Sidney Crosby,Pittsburgh Penguins,Conn Smythe


In [102]:
# Group the data by the player's name column and count
conn_smythe_grp_df = conn_smythe_df.groupby(by="Player", as_index=False).count()
conn_smythe_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Al MacInnis,1,1,1
1,Alex Ovechkin,1,1,1
2,Bernie Parent,2,2,2
3,Bill Ranford,1,1,1
4,Billy Smith,1,1,1


In [103]:
# Drop columns that are not needed
conn_smythe_grp_df = conn_smythe_grp_df.drop(['Team', 'Award'], axis=1)
conn_smythe_grp_df.head()

Unnamed: 0,Player,Year
0,Al MacInnis,1
1,Alex Ovechkin,1
2,Bernie Parent,2
3,Bill Ranford,1
4,Billy Smith,1


In [104]:
# Rename the year column to the award name
conn_smythe_grp_df = conn_smythe_grp_df.rename(columns={"Year": "Conn Smythe Awards"})
conn_smythe_grp_df.head()

Unnamed: 0,Player,Conn Smythe Awards
0,Al MacInnis,1
1,Alex Ovechkin,1
2,Bernie Parent,2
3,Bill Ranford,1
4,Billy Smith,1


NHL Calder Memorial Trophy Winners, Award presented to the rookie of the year.

In [105]:
# Scrape the data for the Calder Memorial Trophy Awards, Rookie of the Year
url = 'https://www.nhl.com/news/nhl-calder-memorial-trophy-winners-complete-list/c-287749198?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), 'Calder Memorial']
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
calder_memorial_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
calder_memorial_df['Year'] = calder_memorial_df['Year'].astype(int)
# filter dataframe 
# calder_memorial_df = calder_memorial_df.loc[calder_memorial_df['Year'] >= 1967,:]
calder_memorial_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Cale Makar,Colorado Avalanche,Calder Memorial
1,2019,Elias Pettersson,Vancouver Canucks,Calder Memorial
2,2018,Mathew Barzal,New York Islanders,Calder Memorial
3,2017,Auston Matthews,Toronto Maple Leafs,Calder Memorial
4,2016,Artemi Panarin,Chicago Blackhawks,Calder Memorial


In [106]:
# Group the data by the player's name column
calder_memorial_grp_df = calder_memorial_df.groupby(by="Player", as_index=False).count()
calder_memorial_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Aaron Ekblad,1,1,1
1,Alex Ovechkin,1,1,1
2,Andrew Raycroft,1,1,1
3,Artemi Panarin,1,1,1
4,Auston Matthews,1,1,1


In [107]:
# Drop columns that are not needed
calder_memorial_grp_df = calder_memorial_grp_df.drop(['Team', 'Award'], axis=1)
calder_memorial_grp_df.head()

Unnamed: 0,Player,Year
0,Aaron Ekblad,1
1,Alex Ovechkin,1
2,Andrew Raycroft,1
3,Artemi Panarin,1
4,Auston Matthews,1


In [108]:
# Rename the year column to the award name
calder_memorial_grp_df = calder_memorial_grp_df.rename(columns={"Year": "Calder Awards"})
calder_memorial_grp_df.head()

Unnamed: 0,Player,Calder Awards
0,Aaron Ekblad,1
1,Alex Ovechkin,1
2,Andrew Raycroft,1
3,Artemi Panarin,1
4,Auston Matthews,1


NHL James Norris Memorial Trophy Winners, Award presented to the leagues top defenseman.

In [109]:
# Scrape the data for the James Norris Memorial Trophy, Top Defenseman 
import re
url = 'https://www.nhl.com/news/nhl-james-norris-memorial-trophy-winners-complete-list/c-287778852?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = re.split(':|;', row)
        #print(words)
        more_words = words[1].split(',')
        #print(more_words)

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), 'James Norris Memorial']
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
james_norris_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
james_norris_df['Year'] = james_norris_df['Year'].astype(int)
# filter dataframe 
# james_norris_df = james_norris_df.loc[james_norris_df['Year'] >= 1967,:]
james_norris_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Roman Josi,Calgary Flames,James Norris Memorial
1,2019,Mark Giordano,Calgary Flames,James Norris Memorial
2,2018,Victor Hedman,Tampa Bay Lightning,James Norris Memorial
3,2017,Brent Burns,San Jose Sharks,James Norris Memorial
4,2016,Drew Doughty,Los Angeles Kings,James Norris Memorial


In [110]:
# Group the data by the player's name column and count
james_norris_grp_df = james_norris_df.groupby(by="Player", as_index=False).count()
james_norris_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Al MacInnis,1,1,1
1,Bobby Orr,8,8,8
2,Brent Burns,1,1,1
3,Brian Leetch,2,2,2
4,Chris Chelios,3,3,3


In [111]:
# Drop columns that are not needed
james_norris_grp_df = james_norris_grp_df.drop(['Team', 'Award'], axis=1)
james_norris_grp_df.head()

Unnamed: 0,Player,Year
0,Al MacInnis,1
1,Bobby Orr,8
2,Brent Burns,1
3,Brian Leetch,2
4,Chris Chelios,3


In [112]:
# Rename the year column to the award name
james_norris_grp_df = james_norris_grp_df.rename(columns={"Year": "James Norris Awards"})
james_norris_grp_df.head()

Unnamed: 0,Player,James Norris Awards
0,Al MacInnis,1
1,Bobby Orr,8
2,Brent Burns,1
3,Brian Leetch,2
4,Chris Chelios,3


NHL Ted Lindsay Award Winners. Award presented to the most outstanding player as voted but the NHLPA members.

In [113]:
# Scrape the data for the Ted Lindsay Award, Outstanding Player voted by NHLPA players
url = 'https://www.nhl.com/news/nhl-ted-lindsay-award-winners-complete-list/c-287978282?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), "Ted Lindsay"]
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
ted_lindsay_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
ted_lindsay_df['Year'] = ted_lindsay_df['Year'].astype(int)
# filter dataframe 
# ted_lindsay_df = ted_lindsay_df.loc[ted_lindsay_df['Year'] >= 1967,:]
ted_lindsay_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Leon Draisaitl,Edmonton Oilers,Ted Lindsay
1,2019,Nikita Kucherov,Tampa Bay Lightning,Ted Lindsay
2,2018,Connor McDavid,Edmonton Oilers,Ted Lindsay
3,2017,Connor McDavid,Edmonton Oilers,Ted Lindsay
4,2016,Patrick Kane,Chicago Blackhawks,Ted Lindsay


In [114]:
# Group the data by the player's name column and count
ted_lindsay_grp_df = ted_lindsay_df.groupby(by="Player", as_index=False).count()
ted_lindsay_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Alex Ovechkin,3,3,3
1,Bobby Clarke,1,1,1
2,Bobby Orr,1,1,1
3,Brett Hull,1,1,1
4,Carey Price,1,1,1


In [115]:
# Drop columns that are not needed
ted_lindsay_grp_df = ted_lindsay_grp_df.drop(['Team', 'Award'], axis=1)
ted_lindsay_grp_df.head()

Unnamed: 0,Player,Year
0,Alex Ovechkin,3
1,Bobby Clarke,1
2,Bobby Orr,1
3,Brett Hull,1
4,Carey Price,1


In [116]:
# Rename the year column to the award name
ted_lindsay_grp_df = ted_lindsay_grp_df.rename(columns={"Year": "Ted Lindsay Awards"})
ted_lindsay_grp_df.head()

Unnamed: 0,Player,Ted Lindsay Awards
0,Alex Ovechkin,3
1,Bobby Clarke,1
2,Bobby Orr,1
3,Brett Hull,1
4,Carey Price,1


NHL Maurice Richard Trophy Winners, Award presented to the years top goal-scorer.

In [117]:
# Scrape the data for the Maurice Richard Award, Top Goal Scorer
url = 'https://www.nhl.com/news/nhl-maurice-richard-trophy-winners-complete-list/c-287972892?tid=287709666'

# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# results are returned as a list
body = soup.find('div', class_='article-item__body')

results = body.find_all('li')

winners = []
for result in results:
    # Error handling
    try:
        #print(result)
        row = result.text
        #print(f'row: {row}')
        words = row.split(':')
        more_words = words[1].split(',')

        winner = [words[0], more_words[0].strip(), more_words[1].strip(), "Maurice Richard"]
        #print(winner)
        winners.append(winner)

    except AttributeError as error:
        print(error)

# convert list of lists into DataFrame
maurice_richard_df = pd.DataFrame(winners, columns = ['Year', 'Player', 'Team', 'Award'])
# convert year column to int for filtering purposes
maurice_richard_df['Year'] = maurice_richard_df['Year'].astype(int)
# filter dataframe 
# maurice_richard_df = maurice_richard_df.loc[ted_lindsay_df['Year'] >= 1967,:]
maurice_richard_df.head()

Unnamed: 0,Year,Player,Team,Award
0,2020,Alex Ovechkin,Washington Capitals,Maurice Richard
1,2020,David Pastrnak,Boston Bruins,Maurice Richard
2,2019,Alex Ovechkin,Washington Capitals,Maurice Richard
3,2018,Alex Ovechkin,Washington Capitals,Maurice Richard
4,2017,Sidney Crosby,Pittsburgh Penguins,Maurice Richard


In [118]:
# Group the data by the player's name column
maurice_richard_grp_df = maurice_richard_df.groupby(by="Player", as_index=False).count()
maurice_richard_grp_df.head()

Unnamed: 0,Player,Year,Team,Award
0,Alex Ovechkin,9,9,9
1,Corey Perry,1,1,1
2,David Pastrnak,1,1,1
3,Ilya Kovalchuk,1,1,1
4,Jarome Iginla,2,2,2


In [119]:
# Drop columns that are not needed
maurice_richard_grp_df = maurice_richard_grp_df.drop(['Team', 'Award'], axis=1)
maurice_richard_grp_df.head()

Unnamed: 0,Player,Year
0,Alex Ovechkin,9
1,Corey Perry,1
2,David Pastrnak,1
3,Ilya Kovalchuk,1
4,Jarome Iginla,2


In [120]:
# Rename the year column to theaward name for the count
maurice_richard_grp_df = maurice_richard_grp_df.rename(columns={"Year": "Maurice Richard Awards"})
maurice_richard_grp_df.head()

Unnamed: 0,Player,Maurice Richard Awards
0,Alex Ovechkin,9
1,Corey Perry,1
2,David Pastrnak,1
3,Ilya Kovalchuk,1
4,Jarome Iginla,2


Merge all the award dataframes together and clean the data

In [121]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(art_ross_grp_df,
                 conn_smythe_grp_df[['Player', 'Conn Smythe Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards
0,Ace Bailey,1.0,
1,Alex Ovechkin,1.0,1.0
2,Babe Dye,2.0,
3,Bernie Geoffrion,2.0,
4,Bill Cook,2.0,


In [122]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 hart_grp_df[['Player', 'Hart Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards
0,Ace Bailey,1.0,,
1,Alex Ovechkin,1.0,1.0,3.0
2,Babe Dye,2.0,,
3,Bernie Geoffrion,2.0,,1.0
4,Bill Cook,2.0,,


In [123]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 calder_memorial_grp_df[['Player', 'Calder Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards
0,Ace Bailey,1.0,,,
1,Alex Ovechkin,1.0,1.0,3.0,1.0
2,Babe Dye,2.0,,,
3,Bernie Geoffrion,2.0,,1.0,1.0
4,Bill Cook,2.0,,,


In [124]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 james_norris_grp_df[['Player', 'James Norris Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards,James Norris Awards
0,Ace Bailey,1.0,,,,
1,Alex Ovechkin,1.0,1.0,3.0,1.0,
2,Babe Dye,2.0,,,,
3,Bernie Geoffrion,2.0,,1.0,1.0,
4,Bill Cook,2.0,,,,


In [125]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 ted_lindsay_grp_df[['Player', 'Ted Lindsay Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards,James Norris Awards,Ted Lindsay Awards
0,Ace Bailey,1.0,,,,,
1,Alex Ovechkin,1.0,1.0,3.0,1.0,,3.0
2,Babe Dye,2.0,,,,,
3,Bernie Geoffrion,2.0,,1.0,1.0,,
4,Bill Cook,2.0,,,,,


In [126]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 maurice_richard_grp_df[['Player', 'Maurice Richard Awards']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards,James Norris Awards,Ted Lindsay Awards,Maurice Richard Awards
0,Ace Bailey,1.0,,,,,,
1,Alex Ovechkin,1.0,1.0,3.0,1.0,,3.0,9.0
2,Babe Dye,2.0,,,,,,
3,Bernie Geoffrion,2.0,,1.0,1.0,,,
4,Bill Cook,2.0,,,,,,


In [127]:
# Change mismatch names to match across files
awards_merge_df = awards_merge_df.replace("Alex Ovechkin", "Alexander Ovechkin")
awards_merge_df = awards_merge_df.replace("Bernie Geoffrion", "Bernard Geoffrion")
awards_merge_df = awards_merge_df.replace("Evgeni Malkin", "Yevgeni Malkin")
awards_merge_df = awards_merge_df.replace("Johnny Quilty", "John Quilty")
awards_merge_df = awards_merge_df.replace("Ray Bourque", "Raymond Bourque")
awards_merge_df = awards_merge_df.replace("Sweeney Schriner", "David Schriner")
awards_merge_df = awards_merge_df.replace("Tommy Anderson", "Tom Anderson")
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards,James Norris Awards,Ted Lindsay Awards,Maurice Richard Awards
0,Ace Bailey,1.0,,,,,,
1,Alexander Ovechkin,1.0,1.0,3.0,1.0,,3.0,9.0
2,Babe Dye,2.0,,,,,,
3,Bernard Geoffrion,2.0,,1.0,1.0,,,
4,Bill Cook,2.0,,,,,,


In [128]:
awards_merge_df = awards_merge_df.fillna(0)
awards_merge_df['Awards3'] = awards_merge_df['Art Ross Awards'] + awards_merge_df['Conn Smythe Awards'] + awards_merge_df['Hart Awards']
awards_merge_df['Awards2'] = awards_merge_df['Awards3'] + awards_merge_df['Calder Awards'] + awards_merge_df['James Norris Awards']  
awards_merge_df['Awards'] = awards_merge_df['Awards2'] + awards_merge_df['Ted Lindsay Awards'] + awards_merge_df['Maurice Richard Awards']                           
awards_merge_df.head()

Unnamed: 0,Player,Art Ross Awards,Conn Smythe Awards,Hart Awards,Calder Awards,James Norris Awards,Ted Lindsay Awards,Maurice Richard Awards,Awards3,Awards2,Awards
0,Ace Bailey,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
1,Alexander Ovechkin,1.0,1.0,3.0,1.0,0.0,3.0,9.0,5.0,6.0,18.0
2,Babe Dye,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0
3,Bernard Geoffrion,2.0,0.0,1.0,1.0,0.0,0.0,0.0,3.0,4.0,4.0
4,Bill Cook,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0


In [131]:
# awards_merge_df = awards_merge_df.drop(['Art Ross Awards', 'Conn Smythe Awards', 'Hart Awards', 'Calder Awards', 'James Norris Awards',
#                                          'Ted Lindsay Awards', 'Maurice Richard Awards', 'Awards2', 'Awards3'], axis=1)
awards_merge_df.head()

Unnamed: 0,Player,Awards
0,Ace Bailey,1.0
1,Alexander Ovechkin,18.0
2,Babe Dye,2.0
3,Bernard Geoffrion,4.0
4,Bill Cook,2.0


Scrape the data from url to get the players Stanley Cup wins

In [132]:
# Scrape the data for Stanley Cup wins by player
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from splinter.driver.webdriver import BaseWebDriver, WebDriverElement

In [133]:
options = Options()
options.add_argument('--no-sandbox')
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False, chrome_options=options)

In [134]:
# Set URL to the stanley Cup records page
url = 'https://records.nhl.com/records/playoff-skater-records/stanley-cups/skater-most-stanley-cups-won-career'
browser.visit(url)

In [135]:
# Scrape the data for Stanley Cup wins from the url
my_list = []

for i in range(1, 23):
#     print(i)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    rows = soup.find_all('div', class_='rt-tr-group')
    for row in rows:
        cells = row.find_all('div', class_='rt-td')

        data = {
            'Player':cells[1].text,
            'Pos':cells[2].text,
            'Cups':cells[3].text
        }

        my_list.append(data)
    if i < 22:
        browser.find_by_text('Next').click()

my_list

'Pos': 'L', 'Cups': '1'},
 {'Player': 'Jim Hay', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Ian Cushenan', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Earl Balfour', 'Pos': 'L', 'Cups': '1'},
 {'Player': 'Murray Balfour', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Jack Evans', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Reg Fleming', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Bill Hay', 'Pos': 'C', 'Cups': '1'},
 {'Player': 'Wayne Hicks', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Wayne Hillman', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Bobby Hull', 'Pos': 'L', 'Cups': '1'},
 {'Player': 'Chico Maki', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Stan Mikita', 'Pos': 'C', 'Cups': '1'},
 {'Player': 'Eric Nesterenko', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Pierre Pilote', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Moose Vasko', 'Pos': 'D', 'Cups': '1'},
 {'Player': 'Kenny Wharram', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Andy Bathgate', 'Pos': 'R', 'Cups': '1'},
 {'Player': 'Gerry Ehman', 'Pos': 'R', 'Cups': '1'},
 {'Player': '

In [136]:
# Convert the list to a dataframe
stanley_cup_df = pd.DataFrame(my_list)
stanley_cup_df.head()

Unnamed: 0,Player,Pos,Cups
0,Henri Richard,C,11
1,Jean Beliveau,C,10
2,Yvan Cournoyer,R,10
3,Claude Provost,R,9
4,Maurice Richard,R,8


In [137]:
# Filter out the goal tenders.
stanley_cup_df = stanley_cup_df[stanley_cup_df['Pos'] != 'G'] 
stanley_cup_df.head()

Unnamed: 0,Player,Pos,Cups
0,Henri Richard,C,11
1,Jean Beliveau,C,10
2,Yvan Cournoyer,R,10
3,Claude Provost,R,9
4,Maurice Richard,R,8


In [138]:
# Drop columns that are not needed
stanley_cup_df = stanley_cup_df.drop(['Pos'], axis=1)
stanley_cup_df.head()

Unnamed: 0,Player,Cups
0,Henri Richard,11
1,Jean Beliveau,10
2,Yvan Cournoyer,10
3,Claude Provost,9
4,Maurice Richard,8


In [139]:
# Change names to match the master stats
stanley_cup_df = stanley_cup_df.replace("Alex Ovechkin", "Alexander Ovechkin")
stanley_cup_df = stanley_cup_df.replace("Bernie Geoffrion", "Bernard Geoffrion")
stanley_cup_df = stanley_cup_df.replace("Evgeni Malkin", "Yevgeni Malkin")
stanley_cup_df = stanley_cup_df.replace("Johnny Quilty", "John Quilty")
stanley_cup_df = stanley_cup_df.replace("Ray Bourque", "Raymond Bourque")
stanley_cup_df = stanley_cup_df.replace("Sweeney Schriner", "David Schriner")
stanley_cup_df = stanley_cup_df.replace("Tommy Anderson", "Tom Anderson")
stanley_cup_df = stanley_cup_df.replace("Bob Sirois", "Robert Sirois")
stanley_cup_df = stanley_cup_df.replace("Bobby Nystrom", "Bob Nystrom")
stanley_cup_df = stanley_cup_df.replace("Bucky Hollingworth", "Gord Hollingworth")
stanley_cup_df = stanley_cup_df.replace("Daniel Girardi", "Dan Girardi")
stanley_cup_df = stanley_cup_df.replace("Dollard St. Laurent", "Dollard St-Laurent")
stanley_cup_df = stanley_cup_df.replace("Evgeny Kuznetsov", "Yevgeni Kuznetsov")
stanley_cup_df = stanley_cup_df.replace("Fern Flaman", "Fernie Flaman")
stanley_cup_df = stanley_cup_df.replace("George 'Hully' Gee", "George Gee")
stanley_cup_df = stanley_cup_df.replace("Hubert 'Pit' Martin", "Pit Martin")
stanley_cup_df = stanley_cup_df.replace("J.C. Tremblay", "Jean-Claude Tremblay")
stanley_cup_df = stanley_cup_df.replace("Jack Leclair", "Jackie LeClair")
stanley_cup_df = stanley_cup_df.replace("Joshua Bailey", "Josh Bailey")
stanley_cup_df = stanley_cup_df.replace("Kenny Mosdell", "Ken Mosdell")
stanley_cup_df = stanley_cup_df.replace("Kristopher Letang", "Kris Letang")
stanley_cup_df = stanley_cup_df.replace("Leonard 'Red' Kelly", "Red Kelly")
stanley_cup_df = stanley_cup_df.replace("Nicholas Boynton", "Nick Boynton")
stanley_cup_df = stanley_cup_df.replace("Nick Libett", "Lynn Libett")
stanley_cup_df = stanley_cup_df.replace("Oliver Ekman-Larsson", "Oliver Ekman Larsson")
stanley_cup_df = stanley_cup_df.replace("Quinton Hughes", "Quinn Hughes")
stanley_cup_df = stanley_cup_df.replace("Red Sullivan", "George Sullivan")
stanley_cup_df = stanley_cup_df.replace("Reg Fleming", "Reggie Fleming")
stanley_cup_df = stanley_cup_df.replace("Reg Sinclair", "Reggie Sinclair")
stanley_cup_df = stanley_cup_df.replace("Ron 'Chico' Maki", "Chico Maki")
stanley_cup_df = stanley_cup_df.replace("Sandis Ozolinsh", "Sandis Ozolins")
stanley_cup_df = stanley_cup_df.replace("Theoren Fleury", "Theo Fleury")
stanley_cup_df = stanley_cup_df.replace("Willie Huber", "William Huber")
stanley_cup_df = stanley_cup_df.replace("Zachary Werenski", "Zach Werenski")
stanley_cup_df = stanley_cup_df.replace("Al Langlois", "Albert Langlois")
stanley_cup_df = stanley_cup_df.replace("Al Shields", "Allan Shields")
stanley_cup_df = stanley_cup_df.replace("Alex Kovalev", "Alexei Kovalev")
stanley_cup_df = stanley_cup_df.replace("Aurel Joliat", "Aurele Joliat")
stanley_cup_df = stanley_cup_df.replace("Baldy Cotton", "Harold Cotton")
stanley_cup_df = stanley_cup_df.replace("Billy Boyd", "Bill Boyd")
stanley_cup_df = stanley_cup_df.replace("Billy Taylor Sr.", "Billy Taylor")
stanley_cup_df = stanley_cup_df.replace("Bingo Kampman", "Rudolph Kampman")
stanley_cup_df = stanley_cup_df.replace("Bob Fillion", "Robert Fillion")
stanley_cup_df = stanley_cup_df.replace("Bryan Hextall Sr.", "Bryan Hextall, Sr.")
stanley_cup_df = stanley_cup_df.replace("Butch Bouchard", "Emile Bouchard")
stanley_cup_df = stanley_cup_df.replace("Connie Broden", "Connell Broden")
stanley_cup_df = stanley_cup_df.replace("David Michayluk", "Dave Michayluk")
stanley_cup_df = stanley_cup_df.replace("Dimitri Afanasenkov", "Dmitri Afanasenkov")
stanley_cup_df = stanley_cup_df.replace("Dmitry Orlov", "Dmitri Orlov")
stanley_cup_df = stanley_cup_df.replace("Donnie Marshall", "Don Marshall")
stanley_cup_df = stanley_cup_df.replace("Dutch Gainor", "Norman Gainor")
stanley_cup_df = stanley_cup_df.replace("Dutch Hiller", "Wilbert Hiller")
stanley_cup_df = stanley_cup_df.replace("Dutch Reibel", "Earl Reibel")
stanley_cup_df = stanley_cup_df.replace("Ed Gorman", "Edwin Gorman")
stanley_cup_df = stanley_cup_df.replace("Eddie Olczyk", "Ed Olczyk")
stanley_cup_df = stanley_cup_df.replace("Fern Majeau", "Fernand Majeau")
stanley_cup_df = stanley_cup_df.replace("Frank Callighen", "Francis Callighen")
stanley_cup_df = stanley_cup_df.replace("Fred Cook", "Bun Cook")
stanley_cup_df = stanley_cup_df.replace("Freddy Modin", "Fredrik Modin")
stanley_cup_df = stanley_cup_df.replace("George Boucher", "Buck Boucher")
stanley_cup_df = stanley_cup_df.replace("Gord Pettinger", "Gordon Pettinger")
stanley_cup_df = stanley_cup_df.replace("Hobie Kitchen", "Chapman Kitchen")
stanley_cup_df = stanley_cup_df.replace("Ivan Barbashev", "Ivan Barbashyov")
stanley_cup_df = stanley_cup_df.replace("J.C. Tremblay", "Jean-Claude Tremblay")
stanley_cup_df = stanley_cup_df.replace("J.J. Daigneault", "Jean-Jacques Daigneault")
stanley_cup_df = stanley_cup_df.replace("Jimmy Peters", "Jim Peters")
stanley_cup_df = stanley_cup_df.replace("Johnny Sheppard", "John Sheppard")
stanley_cup_df = stanley_cup_df.replace("Lou Trudel", "Louis Trudel")
stanley_cup_df = stanley_cup_df.replace("Max Talbot", "Maxime Talbot")
stanley_cup_df = stanley_cup_df.replace("Michael Needham", "Mike Needham")
stanley_cup_df = stanley_cup_df.replace("Michael Zigomanis", "Mike Zigomanis")
stanley_cup_df = stanley_cup_df.replace("Mikhail Sergachev", "Mikhail Sergachyov")
stanley_cup_df = stanley_cup_df.replace("Moe Lemay", "Maurice Lemay")
stanley_cup_df = stanley_cup_df.replace("Moe Morris", "Elwyn Morris")
stanley_cup_df = stanley_cup_df.replace("Moose Vasko", "Elmer Vasko")
stanley_cup_df = stanley_cup_df.replace("Murph Chamberlain", "Erwin Chamberlain")
stanley_cup_df = stanley_cup_df.replace("Muzz Patrick", "Murray Patrick")
stanley_cup_df = stanley_cup_df.replace("Ott Heller", "Ehrhardt Heller")
stanley_cup_df = stanley_cup_df.replace("Paul Di Pietro", "Paul DiPietro")
stanley_cup_df = stanley_cup_df.replace("Peter Mahovlich", "Pete Mahovlich")
stanley_cup_df = stanley_cup_df.replace("Pit Lepine", "Alfred Lepine")
stanley_cup_df = stanley_cup_df.replace("Punch Broadbent", "Harry Broadbent")
stanley_cup_df = stanley_cup_df.replace("Sam Rothschild", "Samuel Rothschild")
stanley_cup_df = stanley_cup_df.replace("Sammy Blais", "Samuel Blais")
stanley_cup_df = stanley_cup_df.replace("Slava Voynov", "Vyacheslav Voynov")
stanley_cup_df = stanley_cup_df.replace("Stan Neckar", "Stanislav Neckar")
stanley_cup_df = stanley_cup_df.replace("Steve Reinprecht", "Steven Reinprecht")
stanley_cup_df = stanley_cup_df.replace("Viacheslav Fetisov", "Vyacheslav Fetisov")
stanley_cup_df = stanley_cup_df.replace("Lee Fogolin Jr.", "Lee Fogolin, Jr.")
stanley_cup_df = stanley_cup_df.replace("Bryan Hextall Sr.", "Bryan Hextall, Sr.")
stanley_cup_df.head()

Unnamed: 0,Player,Cups
0,Henri Richard,11
1,Jean Beliveau,10
2,Yvan Cournoyer,10
3,Claude Provost,9
4,Maurice Richard,8


Merge the Stanley Cup win dataframe to the awards dataframe.

In [140]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 stanley_cup_df[['Player', 'Cups']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Awards,Cups
0,Ace Bailey,1.0,1
1,Alexander Ovechkin,18.0,1
2,Babe Dye,2.0,1
3,Bernard Geoffrion,4.0,6
4,Bill Cook,2.0,2


Scrape the data from url to get the player all-star game appearances

In [141]:
# Scrape data for all-star game appearances.
all_star_tables = []

for page in range(1,19):
    url = f'https://www.quanthockey.com/scripts/AjaxPaginate.php?cat=Records&pos=Players&SS=&af=0&nat=alltime&st=5&sort=points&so=&page={page}&league=NHL%20All-Star%20Game&lang=en&rnd=651834569&dt=1&sd=undefined&ed=undefined'

    print('page:', page, '-------------')

    # find the table
    table = pd.read_html(url)
    df = table[0]
    # delete unneeded column
    del df['Unnamed: 1']
    # convert games played column to integer
    df['GP'] = df['GP'].astype(int)
    all_star_tables.append(df)
    
print("Scraping completed!")

page: 1 -------------
page: 2 -------------
page: 3 -------------
page: 4 -------------
page: 5 -------------
page: 6 -------------
page: 7 -------------
page: 8 -------------
page: 9 -------------
page: 10 -------------
page: 11 -------------
page: 12 -------------
page: 13 -------------
page: 14 -------------
page: 15 -------------
page: 16 -------------
page: 17 -------------
page: 18 -------------
Scraping completed!


In [142]:
# Combined the 18 tables of data to one dataframe 
all_star_df = pd.concat(all_star_tables, ignore_index=True)
all_star_df.head()

Unnamed: 0,Rk,Name,Born,Pos,GP,G,A,P,PIM,+/-,PPG,SHG,GWG,G/GP,A/GP,P/GP
0,1,Wayne Gretzky,1961,F,18,13,12,25,0,,0.0,0.0,1,0.722,0.667,1.389
1,2,Mario Lemieux,1965,F,10,13,10,23,0,,0.0,0.0,2,1.3,1.0,2.3
2,3,Joe Sakic,1969,F,12,6,16,22,0,3.0,0.0,0.0,0,0.5,1.333,1.833
3,4,Gordie Howe,1928,F,23,10,9,19,25,,6.0,1.0,3,0.435,0.391,0.826
4,5,Mark Messier,1961,F,15,6,13,19,4,2.0,0.0,0.0,2,0.4,0.867,1.267


In [143]:
# Remove goalies from dataframe
all_star_df = all_star_df[all_star_df['Pos'] != 'G'] 
all_star_df.head()

Unnamed: 0,Rk,Name,Born,Pos,GP,G,A,P,PIM,+/-,PPG,SHG,GWG,G/GP,A/GP,P/GP
0,1,Wayne Gretzky,1961,F,18,13,12,25,0,,0.0,0.0,1,0.722,0.667,1.389
1,2,Mario Lemieux,1965,F,10,13,10,23,0,,0.0,0.0,2,1.3,1.0,2.3
2,3,Joe Sakic,1969,F,12,6,16,22,0,3.0,0.0,0.0,0,0.5,1.333,1.833
3,4,Gordie Howe,1928,F,23,10,9,19,25,,6.0,1.0,3,0.435,0.391,0.826
4,5,Mark Messier,1961,F,15,6,13,19,4,2.0,0.0,0.0,2,0.4,0.867,1.267


In [144]:
# Change names to match the master stat file
all_star_df = all_star_df.replace("Alex Ovechkin", "Alexander Ovechkin")
all_star_df = all_star_df.replace("Bernie Geoffrion", "Bernard Geoffrion")
all_star_df = all_star_df.replace("Evgeni Malkin", "Yevgeni Malkin")
all_star_df = all_star_df.replace("Johnny Quilty", "John Quilty")
all_star_df = all_star_df.replace("Ray Bourque", "Raymond Bourque")
all_star_df = all_star_df.replace("Sweeney Schriner", "David Schriner")
all_star_df = all_star_df.replace("Tommy Anderson", "Tom Anderson")
all_star_df = all_star_df.replace("Bob Sirois", "Robert Sirois")
all_star_df = all_star_df.replace("Bobby Nystrom", "Bob Nystrom")
all_star_df = all_star_df.replace("Bucky Hollingworth", "Gord Hollingworth")
all_star_df = all_star_df.replace("Daniel Girardi", "Dan Girardi")
all_star_df = all_star_df.replace("Dollard St. Laurent", "Dollard St-Laurent")
all_star_df = all_star_df.replace("Evgeny Kuznetsov", "Yevgeni Kuznetsov")
all_star_df = all_star_df.replace("Fern Flaman", "Fernie Flaman")
all_star_df = all_star_df.replace("George 'Hully' Gee", "George Gee")
all_star_df = all_star_df.replace("Hubert 'Pit' Martin", "Pit Martin")
all_star_df = all_star_df.replace("J.C. Tremblay", "Jean-Claude Tremblay")
all_star_df = all_star_df.replace("Jack Leclair", "Jackie LeClair")
all_star_df = all_star_df.replace("Joshua Bailey", "Josh Bailey")
all_star_df = all_star_df.replace("Kenny Mosdell", "Ken Mosdell")
all_star_df = all_star_df.replace("Kristopher Letang", "Kris Letang")
all_star_df = all_star_df.replace("Leonard 'Red' Kelly", "Red Kelly")
all_star_df = all_star_df.replace("Nicholas Boynton", "Nick Boynton")
all_star_df = all_star_df.replace("Nick Libett", "Lynn Libett")
all_star_df = all_star_df.replace("Oliver Ekman-Larsson", "Oliver Ekman Larsson")
all_star_df = all_star_df.replace("Quinton Hughes", "Quinn Hughes")
all_star_df = all_star_df.replace("Red Sullivan", "George Sullivan")
all_star_df = all_star_df.replace("Reg Fleming", "Reggie Fleming")
all_star_df = all_star_df.replace("Reg Sinclair", "Reggie Sinclair")
all_star_df = all_star_df.replace("Ron 'Chico' Maki", "Chico Maki")
all_star_df = all_star_df.replace("Sandis Ozolinsh", "Sandis Ozolins")
all_star_df = all_star_df.replace("Theoren Fleury", "Theo Fleury")
all_star_df = all_star_df.replace("Willie Huber", "William Huber")
all_star_df = all_star_df.replace("Zachary Werenski", "Zach Werenski")
all_star_df = all_star_df.replace("John Leclair", "John LeClair")
all_star_df.loc[(all_star_df.Name == 'Lee Fogolin') & (all_star_df.Born == 1955), 'Name'] = 'Lee Fogolin, Jr.'
all_star_df.loc[(all_star_df.Name == 'Billy Harris') & (all_star_df.Born == 1952), 'Name'] = 'Billy Harris, Jr.'
all_star_df.loc[(all_star_df.Name == 'Syl Apps') & (all_star_df.Born == 1947), 'Name'] = 'Syl Apps, Jr.'

all_star_df.head()

Unnamed: 0,Rk,Name,Born,Pos,GP,G,A,P,PIM,+/-,PPG,SHG,GWG,G/GP,A/GP,P/GP
0,1,Wayne Gretzky,1961,F,18,13,12,25,0,,0.0,0.0,1,0.722,0.667,1.389
1,2,Mario Lemieux,1965,F,10,13,10,23,0,,0.0,0.0,2,1.3,1.0,2.3
2,3,Joe Sakic,1969,F,12,6,16,22,0,3.0,0.0,0.0,0,0.5,1.333,1.833
3,4,Gordie Howe,1928,F,23,10,9,19,25,,6.0,1.0,3,0.435,0.391,0.826
4,5,Mark Messier,1961,F,15,6,13,19,4,2.0,0.0,0.0,2,0.4,0.867,1.267


In [145]:
# Change all special characters to standard characters to match master stat file
cols_to_check = ['Name']
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'á':'a'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'í':'i'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ä':'a'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ö':'o'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ç':'c'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'å':'a'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'É':'E'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'Å':'A'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'è':'e'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ë':'e'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'î':'i'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ó':'o'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ô':'o'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ø':'o'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ß':'ss'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ü':'u'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'ý':'y'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'Ö':'O'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'é':'e'}, regex=True)
all_star_df[cols_to_check] = all_star_df[cols_to_check].replace({'š':'s'}, regex=True)

all_star_df.head()

Unnamed: 0,Rk,Name,Born,Pos,GP,G,A,P,PIM,+/-,PPG,SHG,GWG,G/GP,A/GP,P/GP
0,1,Wayne Gretzky,1961,F,18,13,12,25,0,,0.0,0.0,1,0.722,0.667,1.389
1,2,Mario Lemieux,1965,F,10,13,10,23,0,,0.0,0.0,2,1.3,1.0,2.3
2,3,Joe Sakic,1969,F,12,6,16,22,0,3.0,0.0,0.0,0,0.5,1.333,1.833
3,4,Gordie Howe,1928,F,23,10,9,19,25,,6.0,1.0,3,0.435,0.391,0.826
4,5,Mark Messier,1961,F,15,6,13,19,4,2.0,0.0,0.0,2,0.4,0.867,1.267


In [146]:
# Rename the Name column to Player to match other dataframes
all_star_df = all_star_df.rename(columns={"Name": "Player", 'GP': 'All-Star Games'})
all_star_df.head()

Unnamed: 0,Rk,Player,Born,Pos,All-Star Games,G,A,P,PIM,+/-,PPG,SHG,GWG,G/GP,A/GP,P/GP
0,1,Wayne Gretzky,1961,F,18,13,12,25,0,,0.0,0.0,1,0.722,0.667,1.389
1,2,Mario Lemieux,1965,F,10,13,10,23,0,,0.0,0.0,2,1.3,1.0,2.3
2,3,Joe Sakic,1969,F,12,6,16,22,0,3.0,0.0,0.0,0,0.5,1.333,1.833
3,4,Gordie Howe,1928,F,23,10,9,19,25,,6.0,1.0,3,0.435,0.391,0.826
4,5,Mark Messier,1961,F,15,6,13,19,4,2.0,0.0,0.0,2,0.4,0.867,1.267


In [147]:
# Drop columns that are not needed
all_star_df = all_star_df.drop(['Rk', 'Born', 'Pos', 'G', 'A', 'P', 'PIM', '+/-', 'PPG', 'SHG',
                                 'GWG', 'G/GP', 'A/GP', 'P/GP'], axis=1)
all_star_df.head()

Unnamed: 0,Player,All-Star Games
0,Wayne Gretzky,18
1,Mario Lemieux,10
2,Joe Sakic,12
3,Gordie Howe,23
4,Mark Messier,15


Merge the All-Star game dataframe to the Awards dataframe.

In [148]:
# Merge the individual award files into one complete file
awards_merge_df = pd.merge(awards_merge_df,
                 all_star_df[['Player', 'All-Star Games']],
                 on='Player', how='outer')
awards_merge_df.head()

Unnamed: 0,Player,Awards,Cups,All-Star Games
0,Ace Bailey,1.0,1,
1,Alexander Ovechkin,18.0,1,7.0
2,Babe Dye,2.0,1,
3,Bernard Geoffrion,4.0,6,11.0
4,Bill Cook,2.0,2,


In [149]:
# Change all the NaN values to 0 and change numeric columns to integers
awards_merge_df = awards_merge_df.fillna(0)
awards_merge_df = awards_merge_df.astype({'Awards':'int', 'Cups':'int', 'All-Star Games':'int'}) 
awards_merge_df.head() 

Unnamed: 0,Player,Awards,Cups,All-Star Games
0,Ace Bailey,1,1,0
1,Alexander Ovechkin,18,1,7
2,Babe Dye,2,1,0
3,Bernard Geoffrion,4,6,11
4,Bill Cook,2,2,0


In [150]:
awards_merge_df.dtypes

Player            object
Awards             int32
Cups               int32
All-Star Games     int32
dtype: object

In [151]:
# Save the file to csv
awards_merge_df.to_csv("Output/awards_adj.csv", index=False, header=True)