# URL links

Gamesheet:
http://www.nhl.com/scores/htmlreports/20192020/GS020227.HTM

Play by play details:
http://www.nhl.com/scores/htmlreports/20192020/PL020227.HTM

# Scraping data

In [1]:
from bs4 import BeautifulSoup
import requests
import re
import os

In [2]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import pandas as pd

In [3]:
page_play_by_play = requests.get("http://www.nhl.com/scores/htmlreports/20192020/PL020227.HTM")
soup_play_by_play = BeautifulSoup(page_play_by_play.content, 'html.parser')
soup_play_by_play

<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>Play By Play</title>
</head>
<style type="text/css">
				@media screen
				{
				     .print-class { display: block;}
				}
				

				div.page
				{
					page-break-after: always;
					pabe-break-inside: avoid;
				}
				body {border:solid; border-width: 0;}
				p, td {font-family: arial,verdana; font-size: 9px;}
				.tablewidth{width:650px;}
				.heading {font-weight:bold;}
				.goal{font-weight: bold;font-size:11px;}
				.penalty{font-style: italic;font-size:11px;}
				.border {border:1px solid black;border-collapse: collapse;}
				.noborder {border:0px solid black;border-collapse: collapse;}
				.tborder{border-top:1px solid black;}
				.bborder{border-bottom:1px solid black;}
				.lborder{border-left:1px solid black;}
				.rborder{border-right:1px solid black;}
				.oddColor{background-color: #E7E7E7;}
				.evenColor{background-color: #FFFFFF;}
				.bold{font-weight:bold;}
				.italicize{font-

### Find teams

In [4]:
teams = soup_play_by_play.find_all('td', {'align': 'center', 'style': 'font-size: 10px;font-weight:bold'})
teams

[<td align="center" style="font-size: 10px;font-weight:bold">VEGAS GOLDEN KNIGHTS<br/>Game 16 Away Game 7</td>,
 <td align="center" style="font-size: 10px;font-weight:bold"></td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Tuesday, November 5, 2019</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Attendance 15,435 at Nationwide Arena</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Start 7:08 EST; End 9:35 EST</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Game 0227</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Final</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">COLUMBUS BLUE JACKETS<br/>Game 15 Home Game 9</td>,
 <td align="center" style="font-size: 10px;font-weight:bold">VEGAS GOLDEN KNIGHTS<br/>Game 16 Away Game 7</td>,
 <td align="center" style="font-size: 10px;font-weight:bold"></td>,
 <td align="center" style="font-size: 10px;font-weight:bold">Tuesday, November 5, 2

In [5]:
regex = re.compile(r'>(.*)<br/?>')
home_team = regex.findall(str(teams[7]))
home_team

['COLUMBUS BLUE JACKETS']

In [6]:
regex = re.compile(r'>(.*)<br/?>')
away_team = regex.findall(str(teams[0]))
away_team

['VEGAS GOLDEN KNIGHTS']

### Getting event info (1 event code per action)

In [7]:
# Without removing certain lines, there is a break every new page (e.g., between lines 32 and 33)
## Because of this there isn't constant spacing between different data captured on every play
soup_play_by_play.find_all('td', {'align':'center', 'class': 'bborder'})

[<td align="center" class="heading + bborder" width="2%">#</td>,
 <td align="center" class="heading + bborder" width="2%">Per</td>,
 <td align="center" class="heading + bborder" width="2%">Str</td>,
 <td align="center" class="heading + bborder" width="8%">Time:<br/>Elapsed<br/>Game</td>,
 <td align="center" class="heading + bborder" width="8%">Event</td>,
 <td align="center" class="heading + bborder" width="10%">VGK On Ice</td>,
 <td align="center" class="heading + bborder" width="10%">CBJ On Ice</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGSTR</td>,
 <td align="center" class="+ bborder">2</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGEND</td>

In [8]:
# These are all the lines we wish to remove (lines that are added every new page of the document)
removal = soup_play_by_play.findAll('td', {'align':'center', 'class': ['heading + bborder']})
removal

[<td align="center" class="heading + bborder" width="2%">#</td>,
 <td align="center" class="heading + bborder" width="2%">Per</td>,
 <td align="center" class="heading + bborder" width="2%">Str</td>,
 <td align="center" class="heading + bborder" width="8%">Time:<br/>Elapsed<br/>Game</td>,
 <td align="center" class="heading + bborder" width="8%">Event</td>,
 <td align="center" class="heading + bborder" width="10%">VGK On Ice</td>,
 <td align="center" class="heading + bborder" width="10%">CBJ On Ice</td>,
 <td align="center" class="heading + bborder" width="2%">#</td>,
 <td align="center" class="heading + bborder" width="2%">Per</td>,
 <td align="center" class="heading + bborder" width="2%">Str</td>,
 <td align="center" class="heading + bborder" width="8%">Time:<br/>Elapsed<br/>Game</td>,
 <td align="center" class="heading + bborder" width="8%">Event</td>,
 <td align="center" class="heading + bborder" width="10%">VGK On Ice</td>,
 <td align="center" class="heading + bborder" width="10%">C

In [9]:
# Use decompose to remove all of these lines from the soup
removal = soup_play_by_play.findAll('td', {'align':'center', 'class': ['heading + bborder']})
removal
for match in removal:
    match.decompose()

In [10]:
# Rerun soups after having performed removals
# All lines are now spaced evenly, even with new 
play_by_play_event = soup_play_by_play.find_all('td', {'align':'center', 'class': 'bborder'})
play_by_play_event

[<td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGSTR</td>,
 <td align="center" class="+ bborder">2</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGEND</td>,
 <td align="center" class="+ bborder">3</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">ANTHEM</td>,
 <td align="center" class="+ bborder">4</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PSTR</td>,
 <td align="

### Declaring and filling new lists

In [12]:
event_number = []
period = []
play_type = []
time_start = []
time_end = []
event_description = []

In [13]:
nrows_play_by_play = len(play_by_play_event)
for i in range (0, nrows_play_by_play):
    if (i % 5 ==0):
        string_event_number = str(play_by_play_event[i])
        first_index = string_event_number.find('>') + 1
        second_index = string_event_number.find('</td')
        event_number_final = string_event_number[first_index:second_index]
        event_number.append(event_number_final) 
        
        
    elif (i % 5 ==1):
        period_string = str(play_by_play_event[i])
        first_index = period_string.find('>') + 1
        second_index = period_string.find('</td')
        period_final = period_string[first_index:second_index]
        period.append(period_final) 
                
        
    elif (i % 5 ==2):
        string_play_type = str(play_by_play_event[i])
        first_index = string_play_type.find('>') + 1
        second_index = string_play_type.find('</td')
        play = string_play_type[first_index:second_index]
        play_type.append(play) 
                
        
    elif (i % 5 ==3):
        string_time = str(play_by_play_event[i])
        
        # Start time
        first_index = string_time.find('>') + 1
        second_index = string_time.find('<br')
        time_one = string_time[first_index:second_index]
        time_start.append(time_one) 
        
        # End time
        first_index_2 = string_time.find('/>') + 2
        second_index_2 = string_time.find('</td')
        time_two = string_time[first_index_2:second_index_2]
        time_end.append(time_two) 

                
    else:
        string_event_description = str(play_by_play_event[i])
        first_index = string_event_description.find('>') + 1
        second_index = string_event_description.find('</')
        substring_event_description = string_event_description[first_index:second_index]
        event_description.append(substring_event_description) 


In [14]:
string = str(play_by_play_event[5])
string

'<td align="center" class="+ bborder">2</td>'

In [15]:
string.find('>')

36

In [16]:
string.find('</td')

38

In [17]:
string[37:38]

'2'

Testing that all was well captured in lists

In [18]:
event_number

['1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '13

In [19]:
period

['1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2'

In [20]:
play_type

['\xa0',
 '\xa0',
 '\xa0',
 '\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'SH',
 'PP',
 'SH',
 'SH',
 'SH',
 '\xa0',
 'PP',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'PP',
 'PP',
 'PP',
 'PP',
 'PP',
 'SH',
 'PP',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'SH',
 'PP',
 '\xa0',
 '\xa0',
 'SH',
 'PP',
 '\xa0',
 'SH',
 'PP',
 '\xa0',
 'PP',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'PP',
 'PP',
 'SH',
 'SH',
 'PP',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'PP',

In [21]:
time_start

['0:00',
 '0:00',
 '0:00',
 '0:00',
 '0:00',
 '0:19',
 '0:19',
 '0:27',
 '0:30',
 '0:52',
 '1:26',
 '1:27',
 '1:27',
 '1:30',
 '1:31',
 '1:52',
 '1:58',
 '1:58',
 '2:35',
 '2:41',
 '2:50',
 '2:53',
 '3:26',
 '3:26',
 '4:13',
 '4:41',
 '4:42',
 '4:42',
 '4:51',
 '5:34',
 '5:52',
 '6:10',
 '6:11',
 '6:11',
 '6:35',
 '6:35',
 '7:36',
 '7:44',
 '7:44',
 '7:50',
 '7:58',
 '8:31',
 '8:57',
 '9:09',
 '9:34',
 '10:33',
 '10:39',
 '10:39',
 '11:15',
 '11:23',
 '11:24',
 '11:24',
 '11:39',
 '11:46',
 '11:50',
 '11:58',
 '11:59',
 '12:07',
 '12:26',
 '13:37',
 '13:41',
 '14:00',
 '14:02',
 '14:02',
 '14:35',
 '15:21',
 '15:21',
 '16:02',
 '16:43',
 '16:56',
 '17:38',
 '17:38',
 '17:38',
 '17:45',
 '17:45',
 '17:50',
 '18:04',
 '18:17',
 '18:17',
 '18:32',
 '18:33',
 '18:33',
 '19:15',
 '19:15',
 '19:15',
 '19:15',
 '19:15',
 '19:48',
 '20:00',
 '0:00',
 '0:00',
 '0:29',
 '0:30',
 '0:30',
 '1:12',
 '1:14',
 '1:14',
 '1:22',
 '2:30',
 '2:30',
 '3:05',
 '3:16',
 '3:16',
 '3:40',
 '3:47',
 '3:48',
 '

In [22]:
time_end

['20:00',
 '20:00',
 '20:00',
 '20:00',
 '20:00',
 '19:41',
 '19:41',
 '19:33',
 '19:30',
 '19:08',
 '18:34',
 '18:33',
 '18:33',
 '18:30',
 '18:29',
 '18:08',
 '18:02',
 '18:02',
 '17:25',
 '17:19',
 '17:10',
 '17:07',
 '16:34',
 '16:34',
 '15:47',
 '15:19',
 '15:18',
 '15:18',
 '15:09',
 '14:26',
 '14:08',
 '13:50',
 '13:49',
 '13:49',
 '13:25',
 '13:25',
 '12:24',
 '12:16',
 '12:16',
 '12:10',
 '12:02',
 '11:29',
 '11:03',
 '10:51',
 '10:26',
 '9:27',
 '9:21',
 '9:21',
 '8:45',
 '8:37',
 '8:36',
 '8:36',
 '8:21',
 '8:14',
 '8:10',
 '8:02',
 '8:01',
 '7:53',
 '7:34',
 '6:23',
 '6:19',
 '6:00',
 '5:58',
 '5:58',
 '5:25',
 '4:39',
 '4:39',
 '3:58',
 '3:17',
 '3:04',
 '2:22',
 '2:22',
 '2:22',
 '2:15',
 '2:15',
 '2:10',
 '1:56',
 '1:43',
 '1:43',
 '1:28',
 '1:27',
 '1:27',
 '0:45',
 '0:45',
 '0:45',
 '0:45',
 '0:45',
 '0:12',
 '0:00',
 '20:00',
 '20:00',
 '19:31',
 '19:30',
 '19:30',
 '18:48',
 '18:46',
 '18:46',
 '18:38',
 '17:30',
 '17:30',
 '16:55',
 '16:44',
 '16:44',
 '16:20',
 '16

In [23]:
event_description

['PGSTR',
 'PGEND',
 'ANTHEM',
 'PSTR',
 'FAC',
 'STOP',
 'FAC',
 'SHOT',
 'BLOCK',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'MISS',
 'PENL',
 'FAC',
 'GIVE',
 'BLOCK',
 'BLOCK',
 'HIT',
 'STOP',
 'FAC',
 'SHOT',
 'BLOCK',
 'STOP',
 'FAC',
 'MISS',
 'HIT',
 'HIT',
 'SHOT',
 'STOP',
 'FAC',
 'STOP',
 'FAC',
 'HIT',
 'PENL',
 'FAC',
 'SHOT',
 'SHOT',
 'SHOT',
 'GIVE',
 'TAKE',
 'MISS',
 'BLOCK',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'HIT',
 'BLOCK',
 'SHOT',
 'HIT',
 'MISS',
 'GIVE',
 'HIT',
 'HIT',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'HIT',
 'STOP',
 'FAC',
 'HIT',
 'TAKE',
 'BLOCK',
 'SHOT',
 'STOP',
 'FAC',
 'STOP',
 'FAC',
 'MISS',
 'HIT',
 'HIT',
 'HIT',
 'BLOCK',
 'STOP',
 'FAC',
 'GOAL',
 'STOP',
 'CHL',
 'PENL',
 'FAC',
 'SHOT',
 'PEND',
 'PSTR',
 'FAC',
 'SHOT',
 'STOP',
 'FAC',
 'MISS',
 'STOP',
 'FAC',
 'MISS',
 'STOP',
 'FAC',
 'BLOCK',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'HIT',
 'BLOCK',
 'GOAL',
 'FAC',
 'GIVE',

### Get play by play details

In [24]:
# Remove new page breaks from play by play data
removal_2 = soup_play_by_play.findAll('td', {'align':'left', 'class': ['heading + bborder'], 'width':'58%' })
removal_2
for match in removal_2:
    match.decompose()

In [25]:
# Rerun soups after having performed removals
# All lines are now spaced evenly, even with new 
play_by_play_details = soup_play_by_play.find_all('td', {'class': 'bborder'})
play_by_play_details

[<td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGSTR</td>,
 <td class="+ bborder"> </td>,
 <td class="+ bborder + rborder"> </td>,
 <td class="+ bborder"> </td>,
 <td align="center" class="+ bborder">2</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">PGEND</td>,
 <td class="+ bborder"> </td>,
 <td class="+ bborder + rborder"> </td>,
 <td class="+ bborder"> </td>,
 <td align="center" class="+ bborder">3</td>,
 <td align="center" class="+ bborder">1</td>,
 <td align="center" class="+ bborder"> </td>,
 <td align="center" class="+ bborder">0:00<br/>20:00</td>,
 <td align="center" class="+ bborder">ANTHEM</td>,
 <td class="+ bborder"> </td>,
 <td class="+ bborder

In [26]:
len(play_by_play_details)

2072

Start at line 25 (index 24) - skips over National anthems and other pre-game events

In [27]:
# Returns play number
play_by_play_details[24]

<td align="center" class="+ bborder">4</td>

In [28]:
# Returns period
play_by_play_details[25]

<td align="center" class="+ bborder">1</td>

In [29]:
# Returns type of play (even strength, power play, etc.)
play_by_play_details[26]

<td align="center" class="+ bborder"> </td>

In [30]:
# Returns time - in 2 formats (a) time since start, (b) time remaining
play_by_play_details[27]

<td align="center" class="+ bborder">0:00<br/>20:00</td>

In [31]:
# Returns event description
play_by_play_details[28]

<td align="center" class="+ bborder">PSTR</td>

In [32]:
# Returns event details
play_by_play_details[29]

<td class="+ bborder">Period Start- Local time: 7:08 EST</td>

In [33]:
# Returns info for all players on ice for one team - not clean
play_by_play_details[30]

<td class="+ bborder + rborder">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<font style="cursor:hand;" title="Center - WILLIAM KARLSSON">71</font>
</td>
</tr>
<tr>
<td align="center">C</td>
</tr>
</table>
</td>
<td align="center"> </td>
<td align="center">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<font style="cursor:hand;" title="Center - JONATHAN MARCHESSAULT">81</font>
</td>
</tr>
<tr>
<td align="center">C</td>
</tr>
</table>
</td>
<td align="center"> </td>
<td align="center">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<font style="cursor:hand;" title="Right Wing - REILLY SMITH">19</font>
</td>
</tr>
<tr>
<td align="center">R</td>
</tr>
</table>
</td>
<td align="center"> </td>
<td align="center">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<font style="cursor:hand;" title="Defen

In [34]:
# Returns event details - clean form
play_by_play_details[30].find_all('font')

[<font style="cursor:hand;" title="Center - WILLIAM KARLSSON">71</font>,
 <font style="cursor:hand;" title="Center - JONATHAN MARCHESSAULT">81</font>,
 <font style="cursor:hand;" title="Right Wing - REILLY SMITH">19</font>,
 <font style="cursor:hand;" title="Defense - JON MERRILL">15</font>,
 <font style="cursor:hand;" title="Defense - NICK HOLDEN">22</font>,
 <font style="cursor:hand;" title="Goalie - MARC-ANDRE FLEURY">29</font>]

In [35]:
# Returns event details - clean form for second team
play_by_play_details[31].find_all('font')

[<font style="cursor:hand;" title="Center - GUSTAV NYQUIST">14</font>,
 <font style="cursor:hand;" title="Right Wing - OLIVER BJORKSTRAND">28</font>,
 <font style="cursor:hand;" title="Left Wing - PIERRE-LUC DUBOIS">18</font>,
 <font style="cursor:hand;" title="Defense - SETH JONES">3</font>,
 <font style="cursor:hand;" title="Defense - ZACH WERENSKI">8</font>,
 <font style="cursor:hand;" title="Goalie - JOONAS KORPISALO">70</font>]

In [36]:
len(play_by_play_details[31].find_all('font'))

6

In [37]:
play_by_play_details[32]

<td align="center" class="+ bborder">5</td>

In [38]:
str(play_by_play_details[31].find_all('font')[0])

'<font style="cursor:hand;" title="Center - GUSTAV NYQUIST">14</font>'

In the case of a penalty, there will be less players on the ice - the goal tender is always last

In [39]:
str(play_by_play_details[303].find_all('font')[0]).find('</')

57

In [40]:
str(play_by_play_details[303].find_all('font')[0]).find('>')

54

In [41]:
str(play_by_play_details[303].find_all('font')[0])[55:57]

'20'

In [42]:
str(play_by_play_details[303].find_all('font')[0]).find('- ')

41

In [43]:
str(play_by_play_details[303].find_all('font')[0]).find('">')

53

In [44]:
str(play_by_play_details[303].find_all('font'))[44:54]

'RILEY NASH'

In [45]:
str(play_by_play_details[303].find_all('font')[0]).find('title="')

27

In [46]:
str(play_by_play_details[303].find_all('font')[0]).find(' -')

40

In [47]:
str(play_by_play_details[303].find_all('font'))[35:41]

'Center'

Run loop to extract values

In [48]:
action_number = []
period_number = []
strength = []
action_start = []
action_end = []
action_description = []
action_details = []
away_player_1_name = []
away_player_1_position = []
away_player_1_number = []
away_player_2_name = []
away_player_2_position = []
away_player_2_number = []
away_player_3_name = []
away_player_3_position = []
away_player_3_number = []
away_player_4_name = []
away_player_4_position = []
away_player_4_number = []
away_player_5_name = []
away_player_5_position = []
away_player_5_number = []
away_player_6_name = []
away_player_6_position = []
away_player_6_number = []
home_player_1_name = []
home_player_1_position = []
home_player_1_number = []
home_player_2_name = []
home_player_2_position = []
home_player_2_number = []
home_player_3_name = []
home_player_3_position = []
home_player_3_number = []
home_player_4_name = []
home_player_4_position = []
home_player_4_number = []
home_player_5_name = []
home_player_5_position = []
home_player_5_number = []
home_player_6_name = []
home_player_6_position = []
home_player_6_number = []

In [49]:
# Start at 24 (eliminating 3 pregrame ceremonies) and eliminate last row (which is empty)
for i in range (24, len(play_by_play_details)-8):
    j = i -24
    if (j % 8 ==0):
        string_event_number = str(play_by_play_details[i])
        first_index = string_event_number.find('>') + 1
        second_index = string_event_number.find('</td')
        event_number_final = string_event_number[first_index:second_index]
        action_number.append(event_number_final) 
        
        
    elif (j % 8 ==1):
        period_string = str(play_by_play_details[i])
        first_index = period_string.find('>') + 1
        second_index = period_string.find('</td')
        period_final = period_string[first_index:second_index]
        period_number.append(period_final) 
                
        
    elif (j % 8 ==2):
        string_play_type = str(play_by_play_details[i])
        first_index = string_play_type.find('>') + 1
        second_index = string_play_type.find('</td')
        play = string_play_type[first_index:second_index]
        strength.append(play) 
                
        
    elif (j % 8 ==3):
        string_time = str(play_by_play_details[i])
        
        # Start time
        first_index = string_time.find('>') + 1
        second_index = string_time.find('<br')
        time_one = string_time[first_index:second_index]
        action_start.append(time_one) 
        
        # End time
        first_index_2 = string_time.find('/>') + 2
        second_index_2 = string_time.find('</td')
        time_two = string_time[first_index_2:second_index_2]
        action_end.append(time_two) 

                
    elif (j % 8 ==4):
        string_event_description = str(play_by_play_details[i])
        first_index = string_event_description.find('>') + 1
        second_index = string_event_description.find('</')
        substring_event_description = string_event_description[first_index:second_index]
        action_description.append(substring_event_description) 
        
    elif (j % 8 ==5):
        string_event_description = str(play_by_play_details[i])
        first_index = string_event_description.find('>') + 1
        second_index = string_event_description.find('</')
        substring_event_description = string_event_description[first_index:second_index]
        action_details.append(substring_event_description) 
        
    elif (j % 8 ==6):
        number_of_players_away = len(play_by_play_details[i].find_all('font'))
        all_players_away = play_by_play_details[i].find_all('font')
        
        string_1 = str(all_players_away[0])
        string_2 = str(all_players_away[1])
        string_3 = str(all_players_away[2])
        string_4 = str(all_players_away[3])

        # Away player 1
        name_index_1 = string_1.find('- ') + 2
        name_index_2 = string_1.find('">') 
        position_index_1 = string_1.find('title="') + 7
        position_index_2 = string_1.find(' -')
        number_index_1 = string_1.find('>') + 1
        number_index_2 = string_1.find('</')

        away_player_1_name.append(string_1[name_index_1:name_index_2])
        away_player_1_position.append(string_1[position_index_1:position_index_2])
        away_player_1_number.append(string_1[number_index_1:number_index_2])

           
        # Away player 2
        name_index_1 = string_2.find('- ') + 2
        name_index_2 = string_2.find('">') 
        position_index_1 = string_2.find('title="') + 7
        position_index_2 = string_2.find(' -')
        number_index_1 = string_2.find('>') + 1
        number_index_2 = string_2.find('</')

        away_player_2_name.append(string_2[name_index_1:name_index_2])
        away_player_2_position.append(string_2[position_index_1:position_index_2])
        away_player_2_number.append(string_2[number_index_1:number_index_2])

        # Away player 3
        name_index_1 = string_3.find('- ') + 2
        name_index_2 = string_3.find('">') 
        position_index_1 = string_3.find('title="') + 7
        position_index_2 = string_3.find(' -')
        number_index_1 = string_3.find('>') + 1
        number_index_2 = string_3.find('</')

        away_player_3_name.append(string_3[name_index_1:name_index_2])
        away_player_3_position.append(string_3[position_index_1:position_index_2])
        away_player_3_number.append(string_3[number_index_1:number_index_2])
           
        # Away player 4
        name_index_1 = string_4.find('- ') + 2
        name_index_2 = string_4.find('">') 
        position_index_1 = string_4.find('title="') + 7
        position_index_2 = string_4.find(' -')
        number_index_1 = string_4.find('>') + 1
        number_index_2 = string_4.find('</')

        away_player_4_name.append(string_4[name_index_1:name_index_2])
        away_player_4_position.append(string_4[position_index_1:position_index_2])
        away_player_4_number.append(string_4[number_index_1:number_index_2])
            
        if (number_of_players_away == 4):
            away_player_5_name.append(" ")
            away_player_5_position.append(" ")
            away_player_5_number.append(" ")
            
            away_player_6_name.append(" ")
            away_player_6_position.append(" ")
            away_player_6_number.append(" ")
        
        if (number_of_players_away == 5):
            string_5 = str(all_players_away[4])

            # Away player 5
            name_index_1 = string_5.find('- ') + 2
            name_index_2 = string_5.find('">') 
            position_index_1 = string_5.find('title="') + 7
            position_index_2 = string_5.find(' -')
            number_index_1 = string_5.find('>') + 1
            number_index_2 = string_5.find('</')

            away_player_5_name.append(string_5[name_index_1:name_index_2])
            away_player_5_position.append(string_5[position_index_1:position_index_2])
            away_player_5_number.append(string_5[number_index_1:number_index_2])

            away_player_6_name.append(" ")
            away_player_6_position.append(" ")
            away_player_6_number.append(" ")
        
        if (number_of_players_away == 6):
            string_5 = str(all_players_away[4])
            string_6 = str(all_players_away[5])

            # Away player 5
            name_index_1 = string_5.find('- ') + 2
            name_index_2 = string_5.find('">') 
            position_index_1 = string_5.find('title="') + 7
            position_index_2 = string_5.find(' -')
            number_index_1 = string_5.find('>') + 1
            number_index_2 = string_5.find('</')

            away_player_5_name.append(string_5[name_index_1:name_index_2])
            away_player_5_position.append(string_5[position_index_1:position_index_2])
            away_player_5_number.append(string_5[number_index_1:number_index_2])

            # Away player 6
            name_index_1 = string_6.find('- ') + 2
            name_index_2 = string_6.find('">') 
            position_index_1 = string_6.find('title="') + 7
            position_index_2 = string_6.find(' -')
            number_index_1 = string_6.find('>') + 1
            number_index_2 = string_6.find('</')

            away_player_6_name.append(string_6[name_index_1:name_index_2])
            away_player_6_position.append(string_6[position_index_1:position_index_2])
            away_player_6_number.append(string_6[number_index_1:number_index_2])            
                        
    else:
        number_of_players_home = len(play_by_play_details[i].find_all('font'))
        all_players_home = play_by_play_details[i].find_all('font')
        
        string_1 = str(all_players_home[0])
        string_2 = str(all_players_home[1])
        string_3 = str(all_players_home[2])
        string_4 = str(all_players_home[3])

        # Away player 1
        name_index_1 = string_1.find('- ') + 2
        name_index_2 = string_1.find('">') 
        position_index_1 = string_1.find('title="') + 7
        position_index_2 = string_1.find(' -')
        number_index_1 = string_1.find('>') + 1
        number_index_2 = string_1.find('</')

        home_player_1_name.append(string_1[name_index_1:name_index_2])
        home_player_1_position.append(string_1[position_index_1:position_index_2])
        home_player_1_number.append(string_1[number_index_1:number_index_2])

           
        # Away player 2
        name_index_1 = string_2.find('- ') + 2
        name_index_2 = string_2.find('">') 
        position_index_1 = string_2.find('title="') + 7
        position_index_2 = string_2.find(' -')
        number_index_1 = string_2.find('>') + 1
        number_index_2 = string_2.find('</')

        home_player_2_name.append(string_2[name_index_1:name_index_2])
        home_player_2_position.append(string_2[position_index_1:position_index_2])
        home_player_2_number.append(string_2[number_index_1:number_index_2])

        # Away player 3
        name_index_1 = string_3.find('- ') + 2
        name_index_2 = string_3.find('">') 
        position_index_1 = string_3.find('title="') + 7
        position_index_2 = string_3.find(' -')
        number_index_1 = string_3.find('>') + 1
        number_index_2 = string_3.find('</')

        home_player_3_name.append(string_3[name_index_1:name_index_2])
        home_player_3_position.append(string_3[position_index_1:position_index_2])
        home_player_3_number.append(string_3[number_index_1:number_index_2])
           
        # Away player 4
        name_index_1 = string_4.find('- ') + 2
        name_index_2 = string_4.find('">') 
        position_index_1 = string_4.find('title="') + 7
        position_index_2 = string_4.find(' -')
        number_index_1 = string_4.find('>') + 1
        number_index_2 = string_4.find('</')

        home_player_4_name.append(string_4[name_index_1:name_index_2])
        home_player_4_position.append(string_4[position_index_1:position_index_2])
        home_player_4_number.append(string_4[number_index_1:number_index_2])
            
        if (number_of_players_home == 4):
            home_player_5_name.append(" ")
            home_player_5_position.append(" ")
            home_player_5_number.append(" ")
            
            home_player_6_name.append(" ")
            home_player_6_position.append(" ")
            home_player_6_number.append(" ")
        
        if (number_of_players_home == 5):
            string_5 = str(all_players_home[4])

            # Away player 5
            name_index_1 = string_5.find('- ') + 2
            name_index_2 = string_5.find('">') 
            position_index_1 = string_5.find('title="') + 7
            position_index_2 = string_5.find(' -')
            number_index_1 = string_5.find('>') + 1
            number_index_2 = string_5.find('</')

            home_player_5_name.append(string_5[name_index_1:name_index_2])
            home_player_5_position.append(string_5[position_index_1:position_index_2])
            home_player_5_number.append(string_5[number_index_1:number_index_2])

            home_player_6_name.append(" ")
            home_player_6_position.append(" ")
            home_player_6_number.append(" ")
        
        if (number_of_players_home == 6):
            string_5 = str(all_players_home[4])
            string_6 = str(all_players_home[5])

            # Away player 5
            name_index_1 = string_5.find('- ') + 2
            name_index_2 = string_5.find('">') 
            position_index_1 = string_5.find('title="') + 7
            position_index_2 = string_5.find(' -')
            number_index_1 = string_5.find('>') + 1
            number_index_2 = string_5.find('</')

            home_player_5_name.append(string_5[name_index_1:name_index_2])
            home_player_5_position.append(string_5[position_index_1:position_index_2])
            home_player_5_number.append(string_5[number_index_1:number_index_2])

            # Away player 6
            name_index_1 = string_6.find('- ') + 2
            name_index_2 = string_6.find('">') 
            position_index_1 = string_6.find('title="') + 7
            position_index_2 = string_6.find(' -')
            number_index_1 = string_6.find('>') + 1
            number_index_2 = string_6.find('</')

            home_player_6_name.append(string_6[name_index_1:name_index_2])
            home_player_6_position.append(string_6[position_index_1:position_index_2])
            home_player_6_number.append(string_6[number_index_1:number_index_2])            


In [50]:
len(play_by_play_details[31].find_all('font'))

6

In [51]:
action_number

['4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '140',
 '141'

In [52]:
period_number

['1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2',
 '2'

In [53]:
strength

['\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'SH',
 'PP',
 'SH',
 'SH',
 'SH',
 '\xa0',
 'PP',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'PP',
 'PP',
 'PP',
 'PP',
 'PP',
 'SH',
 'PP',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'SH',
 'PP',
 '\xa0',
 '\xa0',
 'SH',
 'PP',
 '\xa0',
 'SH',
 'PP',
 '\xa0',
 'PP',
 'EV',
 '\xa0',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'PP',
 'PP',
 'SH',
 'SH',
 'PP',
 'EV',
 'EV',
 '\xa0',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 'EV',
 '\xa0',
 'PP',
 'PP',
 'PP',
 'SH',
 '\xa

In [54]:
action_start

['0:00',
 '0:00',
 '0:19',
 '0:19',
 '0:27',
 '0:30',
 '0:52',
 '1:26',
 '1:27',
 '1:27',
 '1:30',
 '1:31',
 '1:52',
 '1:58',
 '1:58',
 '2:35',
 '2:41',
 '2:50',
 '2:53',
 '3:26',
 '3:26',
 '4:13',
 '4:41',
 '4:42',
 '4:42',
 '4:51',
 '5:34',
 '5:52',
 '6:10',
 '6:11',
 '6:11',
 '6:35',
 '6:35',
 '7:36',
 '7:44',
 '7:44',
 '7:50',
 '7:58',
 '8:31',
 '8:57',
 '9:09',
 '9:34',
 '10:33',
 '10:39',
 '10:39',
 '11:15',
 '11:23',
 '11:24',
 '11:24',
 '11:39',
 '11:46',
 '11:50',
 '11:58',
 '11:59',
 '12:07',
 '12:26',
 '13:37',
 '13:41',
 '14:00',
 '14:02',
 '14:02',
 '14:35',
 '15:21',
 '15:21',
 '16:02',
 '16:43',
 '16:56',
 '17:38',
 '17:38',
 '17:38',
 '17:45',
 '17:45',
 '17:50',
 '18:04',
 '18:17',
 '18:17',
 '18:32',
 '18:33',
 '18:33',
 '19:15',
 '19:15',
 '19:15',
 '19:15',
 '19:15',
 '19:48',
 '20:00',
 '0:00',
 '0:00',
 '0:29',
 '0:30',
 '0:30',
 '1:12',
 '1:14',
 '1:14',
 '1:22',
 '2:30',
 '2:30',
 '3:05',
 '3:16',
 '3:16',
 '3:40',
 '3:47',
 '3:48',
 '3:48',
 '4:27',
 '4:30',
 '

In [55]:
action_end

['20:00',
 '20:00',
 '19:41',
 '19:41',
 '19:33',
 '19:30',
 '19:08',
 '18:34',
 '18:33',
 '18:33',
 '18:30',
 '18:29',
 '18:08',
 '18:02',
 '18:02',
 '17:25',
 '17:19',
 '17:10',
 '17:07',
 '16:34',
 '16:34',
 '15:47',
 '15:19',
 '15:18',
 '15:18',
 '15:09',
 '14:26',
 '14:08',
 '13:50',
 '13:49',
 '13:49',
 '13:25',
 '13:25',
 '12:24',
 '12:16',
 '12:16',
 '12:10',
 '12:02',
 '11:29',
 '11:03',
 '10:51',
 '10:26',
 '9:27',
 '9:21',
 '9:21',
 '8:45',
 '8:37',
 '8:36',
 '8:36',
 '8:21',
 '8:14',
 '8:10',
 '8:02',
 '8:01',
 '7:53',
 '7:34',
 '6:23',
 '6:19',
 '6:00',
 '5:58',
 '5:58',
 '5:25',
 '4:39',
 '4:39',
 '3:58',
 '3:17',
 '3:04',
 '2:22',
 '2:22',
 '2:22',
 '2:15',
 '2:15',
 '2:10',
 '1:56',
 '1:43',
 '1:43',
 '1:28',
 '1:27',
 '1:27',
 '0:45',
 '0:45',
 '0:45',
 '0:45',
 '0:45',
 '0:12',
 '0:00',
 '20:00',
 '20:00',
 '19:31',
 '19:30',
 '19:30',
 '18:48',
 '18:46',
 '18:46',
 '18:38',
 '17:30',
 '17:30',
 '16:55',
 '16:44',
 '16:44',
 '16:20',
 '16:13',
 '16:12',
 '16:12',
 '15

In [56]:
action_description

['PSTR',
 'FAC',
 'STOP',
 'FAC',
 'SHOT',
 'BLOCK',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'MISS',
 'PENL',
 'FAC',
 'GIVE',
 'BLOCK',
 'BLOCK',
 'HIT',
 'STOP',
 'FAC',
 'SHOT',
 'BLOCK',
 'STOP',
 'FAC',
 'MISS',
 'HIT',
 'HIT',
 'SHOT',
 'STOP',
 'FAC',
 'STOP',
 'FAC',
 'HIT',
 'PENL',
 'FAC',
 'SHOT',
 'SHOT',
 'SHOT',
 'GIVE',
 'TAKE',
 'MISS',
 'BLOCK',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'HIT',
 'BLOCK',
 'SHOT',
 'HIT',
 'MISS',
 'GIVE',
 'HIT',
 'HIT',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'HIT',
 'STOP',
 'FAC',
 'HIT',
 'TAKE',
 'BLOCK',
 'SHOT',
 'STOP',
 'FAC',
 'STOP',
 'FAC',
 'MISS',
 'HIT',
 'HIT',
 'HIT',
 'BLOCK',
 'STOP',
 'FAC',
 'GOAL',
 'STOP',
 'CHL',
 'PENL',
 'FAC',
 'SHOT',
 'PEND',
 'PSTR',
 'FAC',
 'SHOT',
 'STOP',
 'FAC',
 'MISS',
 'STOP',
 'FAC',
 'MISS',
 'STOP',
 'FAC',
 'BLOCK',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'STOP',
 'FAC',
 'SHOT',
 'SHOT',
 'HIT',
 'BLOCK',
 'GOAL',
 'FAC',
 'GIVE',
 'PENL',
 'STOP',
 'FAC',
 'GI

In [510]:
action_details

['Period Start- Local time: 7:08 EST',
 'CBJ won Neu. Zone - VGK #71 KARLSSON vs CBJ #18 DUBOIS',
 'PUCK IN BENCHES',
 'CBJ won Neu. Zone - VGK #71 KARLSSON vs CBJ #18 DUBOIS',
 'CBJ ONGOAL - #14 NYQUIST, Wrist, Off. Zone, 29 ft.',
 'CBJ #14 NYQUIST BLOCKED BY  VGK #88 SCHMIDT, Wrist, Def. Zone',
 'VGK ONGOAL - #19 SMITH, Snap, Off. Zone, 46 ft.',
 'VGK ONGOAL - #22 HOLDEN, Wrist, Off. Zone, 52 ft.',
 'GOALIE STOPPED (AFTER SOG)',
 'VGK won Off. Zone - VGK #26 STASTNY vs CBJ #38 JENNER',
 'VGK ONGOAL - #27 THEODORE, Wrist, Off. Zone, 53 ft.',
 'VGK ONGOAL - #26 STASTNY, Wrist, Off. Zone, 7 ft.',
 'VGK #14 HAGUE, Wrist, Wide of Net, Off. Zone, 51 ft.',
 'CBJ #77 ANDERSON\xa0Hooking(2 min), Def. Zone Drawn By: VGK #26 STASTNY',
 'CBJ won Def. Zone - VGK #21 EAKIN vs CBJ #10 WENNBERG',
 'VGK\xa0GIVEAWAY - #71 KARLSSON, Off. Zone',
 'VGK #81 MARCHESSAULT BLOCKED BY  CBJ #58 SAVARD, Wrist, Def. Zone',
 'VGK #21 EAKIN BLOCKED BY  CBJ #58 SAVARD, Wrist, Def. Zone',
 'CBJ #71 FOLIGNO HIT VGK #

In [57]:
away_player_1_name

['WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY GLASS',
 'CODY GLASS',
 'NICOLAS ROY',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'CODY GLASS',
 'CODY GLASS',
 'CODY GLASS',
 'CODY GLASS',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'NICOLAS ROY',
 'NICOLAS ROY',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'CODY EAKIN',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'NICOLAS ROY',
 'CODY GLASS',
 'CODY GLASS',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'CODY GLASS',
 'NICOLAS ROY'

In [58]:
away_player_1_position

['Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',
 'Center',

In [59]:
away_player_1_number

['71',
 '71',
 '71',
 '71',
 '71',
 '71',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '21',
 '21',
 '21',
 '21',
 '21',
 '9',
 '9',
 '10',
 '71',
 '71',
 '71',
 '71',
 '9',
 '9',
 '9',
 '9',
 '26',
 '26',
 '26',
 '10',
 '10',
 '71',
 '71',
 '71',
 '21',
 '26',
 '26',
 '10',
 '9',
 '9',
 '71',
 '71',
 '71',
 '71',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '9',
 '10',
 '10',
 '10',
 '10',
 '71',
 '71',
 '21',
 '9',
 '9',
 '71',
 '71',
 '26',
 '26',
 '10',
 '10',
 '10',
 '10',
 '10',
 '10',
 '10',
 '71',
 '71',
 '71',
 '71',
 '71',
 '71',
 '71',
 '9',
 '9',
 '9',
 '21',
 '21',
 '21',
 '21',
 '21',
 '26',
 '26',
 '26',
 '26',
 '9',
 '9',
 '9',
 '9',
 '10',
 '10',
 '10',
 '10',
 '71',
 '71',
 '71',
 '26',
 '26',
 '26',
 '9',
 '9',
 '9',
 '9',
 '21',
 '71',
 '71',
 '71',
 '71',
 '71',
 '71',
 '26',
 '26',
 '26',
 '26',
 '26',
 '26',
 '10',
 '10',
 '10',
 '10',
 '75',
 '21',
 '21',
 '21',
 '71',
 '71',
 '26',
 '10',
 '10',
 '9',
 '9',
 '9',
 '9',
 '9',
 '9',
 '9',
 '21',


In [60]:
away_player_2_number

['81',
 '81',
 '81',
 '81',
 '81',
 '81',
 '19',
 '61',
 '61',
 '61',
 '61',
 '61',
 '61',
 '61',
 '71',
 '71',
 '71',
 '71',
 '71',
 '26',
 '26',
 '75',
 '75',
 '75',
 '81',
 '81',
 '21',
 '21',
 '21',
 '21',
 '61',
 '61',
 '61',
 '75',
 '75',
 '19',
 '19',
 '19',
 '61',
 '92',
 '92',
 '28',
 '81',
 '81',
 '81',
 '81',
 '81',
 '81',
 '61',
 '61',
 '61',
 '61',
 '61',
 '61',
 '61',
 '21',
 '75',
 '75',
 '75',
 '75',
 '81',
 '81',
 '61',
 '21',
 '21',
 '81',
 '81',
 '61',
 '61',
 '75',
 '75',
 '75',
 '75',
 '75',
 '75',
 '75',
 '81',
 '81',
 '81',
 '81',
 '81',
 '81',
 '81',
 '26',
 '26',
 '26',
 '71',
 '71',
 '71',
 '71',
 '71',
 '61',
 '61',
 '61',
 '61',
 '21',
 '21',
 '21',
 '21',
 '75',
 '75',
 '75',
 '75',
 '81',
 '81',
 '81',
 '61',
 '61',
 '61',
 '21',
 '21',
 '21',
 '21',
 '61',
 '19',
 '19',
 '19',
 '19',
 '19',
 '19',
 '61',
 '61',
 '61',
 '61',
 '61',
 '61',
 '75',
 '75',
 '75',
 '75',
 '28',
 '61',
 '61',
 '19',
 '19',
 '19',
 '92',
 '26',
 '26',
 '61',
 '61',
 '61',
 '61',

In [61]:
away_player_2_name

['JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'REILLY SMITH',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'WILLIAM KARLSSON',
 'PAUL STASTNY',
 'PAUL STASTNY',
 'RYAN REAVES',
 'RYAN REAVES',
 'RYAN REAVES',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY EAKIN',
 'CODY EAKIN',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'RYAN REAVES',
 'RYAN REAVES',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'MARK STONE',
 'TOMAS NOSEK',
 'TOMAS NOSEK',
 'WILLIAM CARRIER',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE',
 'MARK STONE

In [62]:
away_player_3_name

['REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'JONATHAN MARCHESSAULT',
 'MARK STONE',
 'MARK STONE',
 'WILLIAM CARRIER',
 'WILLIAM CARRIER',
 'WILLIAM CARRIER',
 'REILLY SMITH',
 'REILLY SMITH',
 'JONATHAN MARCHESSAULT',
 'TOMAS NOSEK',
 'TOMAS NOSEK',
 'TOMAS NOSEK',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'WILLIAM CARRIER',
 'WILLIAM CARRIER',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'JON MERRILL',
 'JON MERRILL',
 'BRAYDEN MCNABB',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'MAX PACIOR

In [63]:
away_player_4_name

['JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'REILLY SMITH',
 'MAX PACIORETTY',
 'MAX PACIORETTY',
 'SHEA THEODORE',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NATE SCHMIDT',
 'NICOLAS HAGUE',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'NICOLAS HAGUE',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'JON MERRILL',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB',
 'BRAYDEN MCNABB'

In [64]:
away_player_5_name

['NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NATE SCHMIDT',
 'NATE SCHMIDT',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'NATE SCHMIDT',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'SHEA THEODORE',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NICK HOLDEN',
 'NATE SCHMIDT',
 'NATE SCHMIDT

In [65]:
away_player_6_name

['MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE FLEURY',
 'MARC-ANDRE

In [66]:
home_player_1_name

['GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'ALEXANDER WENNBERG',
 'RILEY NASH',
 'RILEY NASH',
 'RILEY NASH',
 'RILEY NASH',
 'BOONE JENNER',
 'ALEXANDER WENNBERG',
 'EMIL BEMSTROM',
 'EMIL BEMSTROM',
 'EMIL BEMSTROM',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'RILEY NASH',
 'RILEY NASH',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'BOONE JENNER',
 'BOONE JENNER',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'RILEY NASH',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'GUSTAV NYQUIST',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'RILEY NASH',
 'RILEY NASH',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'ALEXANDER WENNBERG',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'B

In [67]:
home_player_2_name

['OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'CAM ATKINSON',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'RILEY NASH',
 'RILEY NASH',
 'JOSH ANDERSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'OLIVER BJORKSTRAND',
 'RILEY NASH',
 'EMIL BEMSTROM',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'BOONE JENNER',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'OLIVER BJORKSTRAND',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'JOSH ANDERSO

In [68]:
home_player_3_name

['PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'SETH JONES',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'SETH JONES',
 'VLADISLAV GAVRIKOV',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'JOSH ANDERSON',
 'JOSH ANDERSON',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'PIERRE-LUC DUBOIS',
 'KOLE SHERWOOD',
 'KOLE SHERWOOD',
 'EMIL BEMSTROM',
 'EMIL BEMSTROM',
 'EMIL BEMSTROM',
 'EMIL BEMSTROM',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'PIERRE-LUC DUBOIS',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'SONNY MILANO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO'

In [69]:
home_player_4_name

['SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'ZACH WERENSKI',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'ZACH WERENSKI',
 'DAVID SAVARD',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'DEAN KUKAN',
 'DEAN KUKAN',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'CAM ATKINSON',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'NICK FOLIGNO',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'VLADISLAV GAVRIKOV',
 'DEAN KUKAN',
 'SETH JON

In [70]:
home_player_5_name

['ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'MARKUS NUTIVAARA',
 'MARKUS NUTIVAARA',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'SETH JONES',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'ZACH WERENSKI',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'DAVID SAVARD',
 'MARK

In [558]:
home_player_6_name

['JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 ' ',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORPISALO',
 'JOONAS KORP

# Build panda

In [71]:
pd.set_option('display.max_rows', 500)

In [72]:
df = pd.DataFrame({
   'Event': action_number,
   'Period': period_number,   
   'Strength': strength,
   'Start': action_start,
   'End': action_end, 
   'Description': action_description,
   'Details':action_details,
   'Away p1 name':away_player_1_name,
   'Away p1 pos': away_player_1_position,
   'Away p1 num':away_player_1_number})

df

Unnamed: 0,Event,Period,Strength,Start,End,Description,Details,Away p1 name,Away p1 pos,Away p1 num
0,4,1,,0:00,20:00,PSTR,Period Start- Local time: 7:08 EST,WILLIAM KARLSSON,Center,71
1,5,1,EV,0:00,20:00,FAC,CBJ won Neu. Zone - VGK #71 KARLSSON vs CBJ #1...,WILLIAM KARLSSON,Center,71
2,6,1,,0:19,19:41,STOP,PUCK IN BENCHES,WILLIAM KARLSSON,Center,71
3,7,1,EV,0:19,19:41,FAC,CBJ won Neu. Zone - VGK #71 KARLSSON vs CBJ #1...,WILLIAM KARLSSON,Center,71
4,8,1,EV,0:27,19:33,SHOT,"CBJ ONGOAL - #14 NYQUIST, Wrist, Off. Zone, 29...",WILLIAM KARLSSON,Center,71
5,9,1,EV,0:30,19:30,BLOCK,"CBJ #14 NYQUIST BLOCKED BY VGK #88 SCHMIDT, W...",WILLIAM KARLSSON,Center,71
6,10,1,EV,0:52,19:08,SHOT,"VGK ONGOAL - #19 SMITH, Snap, Off. Zone, 46 ft.",PAUL STASTNY,Center,26
7,11,1,EV,1:26,18:34,SHOT,"VGK ONGOAL - #22 HOLDEN, Wrist, Off. Zone, 52 ft.",PAUL STASTNY,Center,26
8,12,1,,1:27,18:33,STOP,GOALIE STOPPED (AFTER SOG),PAUL STASTNY,Center,26
9,13,1,EV,1:27,18:33,FAC,VGK won Off. Zone - VGK #26 STASTNY vs CBJ #38...,PAUL STASTNY,Center,26


# Salaries scraping

Anaheim Ducks
Arizona Coyotes
Boston Bruins
Buffalo Sabres
Calgary Flames
Carolina Hurricanes
Chicago Blackhawks
Colorado Avalanche
Columbus Blue Jackets
Dallas Stars
Detroit Red Wings
Edmonton Oilers
Florida Panthers
Los Angeles Kings
Minnesota Wild
Montreal Canadiens
Nashville Predators
New Jersey Devils
New York Islanders
New York Rangers
Ottawa Senators
Philadelphia Flyers
Pittsburgh Penguins
San Jose Sharks
Seattle Kraken
St Louis Blues
Tampa Bay Lightning
Toronto Maple Leafs
Vancouver Canucks
Vegas Golden Knights
Washington Capitals
Winnipeg Jets

Create df with first name, then append one by 1
3 columns:
* team name
* Player name
* Salary

https://www.capfriendly.com/teams/coyotes/cap-tracker/2020

In [18]:
page_salary = requests.get("https://www.capfriendly.com/teams/coyotes/cap-tracker/2020")
soup_salary_team = BeautifulSoup(page_salary.content, 'html.parser')
soup_salary_team

<!DOCTYPE html>
<html lang="en"><head><title>Arizona Coyotes Daily Cap Tracker - CapFriendly - NHL Salary Caps</title><link href="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/favicons/favicon.ico" rel="icon" type="image/x-icon"/><link href="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/favicons/favicon.ico" rel="shortcut icon" type="image/x-icon"/><link href="https://fonts.googleapis.com/css?family=Open+Sans:300,400,700,600" rel="stylesheet" type="text/css"/><link href="https://www.capfriendly.com/teams/coyotes/daily-tracker/2020" rel="canonical"><link href="https://www.capfriendly.com/teams/coyotes/daily-tracker/2020" hreflang="en" rel="alternate"/><link href="https://fr.capfriendly.com/teams/coyotes/daily-tracker/2020" hreflang="fr" rel="alternate"/><meta charset="utf-8"/><meta content="strict-origin-when-cross-origin" name="referrer"/><meta content="Daily cap tracking for the Arizona Coyotes NHL club and their respective AHL club" name="description"/><meta

In [20]:
players = soup_salary_team.find_all('td')
players

[<td><a href="/players/oliver-ekman-larsson">Ekman-Larsson, Oliver</a></td>,
 <td class="center">Standard</td>,
 <td class="center">$8,250,000</td>,
 <td class="center">$7,175,000</td>,
 <td class="p0"></td>,
 <td class="center">186</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="p0"></td>,
 <td class="center">$8,250,000</td>,
 <td class="center">$8,250,000</td>,
 <td><a href="/players/phil-kessel">Kessel, Phil</a></td>,
 <td class="center">Standard</td>,
 <td class="center">$6,800,000</td>,
 <td class="center">$5,725,000</td>,
 <td class="p0"></td>,
 <td class="center">186</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</td>,
 <td class="center">0</

In [7]:
len(players)

1026

In [8]:
players[0]

<td><a href="/players/oliver-ekman-larsson">Ekman-Larsson, Oliver</a></td>

In [9]:
players[2]

<td class="center">$8,250,000</td>

In [14]:
players[19]

<td><a href="/players/phil-kessel">Kessel, Phil</a></td>

In [15]:
1026/19

54.0

In [16]:
str(players[0]).find('">')

42

In [17]:
str(players[0]).find('</')

65

In [19]:
str(players[0])[44:65]

'Ekman-Larsson, Oliver'

In [20]:
str(players[2]).find('">')

17

In [21]:
str(players[2]).find('</')

29

In [25]:
str(players[2])[19:29]

'$8,250,000'

In [21]:
number_of_repetitions = int(len(players)/19)
team = ['coyotes'] * number_of_repetitions
player_name = []
salary = []

In [22]:
for i in range (0, len(players)):
    if (i%19 ==0):
        string = str(players[i])
        index_1 = str(players[i]).find('">') + 2
        index_2 = str(players[i]).find('</')
        player_name_string = string[index_1:index_2]
        player_name.append(player_name_string)
        
    if (i%19 ==2):
        string = str(players[i])
        index_1 = str(players[i]).find('">') + 2
        index_2 = str(players[i]).find('</')
        player_salary_string = string[index_1:index_2]
        salary.append(player_salary_string)

In [35]:
team

['coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes',
 'coyotes']

In [36]:
player_name

['Ekman-Larsson, Oliver',
 'Kessel, Phil',
 'Stepan, Derek',
 'Schmaltz, Nick',
 'Goligoski, Alex',
 'Hossa, Marián',
 'Hjalmarsson, Niklas',
 'Söderberg, Carl',
 'Chychrun, Jakob',
 'Dvorak, Christian',
 'Raanta, Antti',
 'Demers, Jason',
 'Grabner, Michael',
 'Kuemper, Darcy',
 'Hall, Taylor',
 'Crouse, Lawson',
 'Hinostroza, Vinnie',
 'Oesterle, Jordan',
 'Richardson, Brad',
 'Keller, Clayton',
 'Lyubushkin, Ilya',
 'Fischer, Christian',
 'Garland, Conor',
 'Hayton, Barrett',
 'Ness, Aaron',
 'Hill, Adin',
 'Comrie, Eric',
 'Capobianco, Kyle',
 'Chaput, Michael',
 'Prosvetov, Ivan',
 'Russo, Robbie',
 'Gross, Jordan',
 'Burke, Brayden',
 'Bunting, Michael',
 'Speers, Blake',
 'Källgren, Erik',
 'Mayo, Dysin',
 'Bennett, Beau',
 'Dineen, Cam',
 'Miele, Andy',
 'Madsen, Merrick',
 'Hännikäinen, Markus',
 'Smereck, Jalen',
 'Pederson, Lane',
 'Schnarr, Nathan',
 'Fiore, Giovanni',
 'Fasching, Hudson',
 'Birks, Dane',
 'Bahl, Kevin',
 'Jeník, Jan',
 'Merkley, Nick',
 'Söderström, Victor

In [40]:
salary

['$8,250,000',
 '$6,800,000',
 '$6,500,000',
 '$5,850,000',
 '$5,475,000',
 '$5,275,000',
 '$5,000,000',
 '$4,750,000',
 '$4,600,000',
 '$4,450,000',
 '$4,250,000',
 '$3,937,500',
 '$3,350,000',
 '$1,850,000',
 '$3,000,000',
 '$1,533,333',
 '$1,500,000',
 '$1,400,000',
 '$1,250,000',
 '$885,833',
 '$874,125',
 '$821,666',
 '$775,000',
 '$894,167',
 '$725,000',
 '$708,750',
 '$700,000',
 '$745,000',
 '$675,000',
 '$809,167',
 '$700,000',
 '$925,000',
 '$925,000',
 '$737,500',
 '$728,333',
 '$817,500',
 '$715,000',
 '$725,000',
 '$745,000',
 '$725,000',
 '$767,500',
 '$750,000',
 '$686,667',
 '$690,000',
 '$786,667',
 '$701,667',
 '$737,500',
 '$755,000',
 '$817,500',
 '$817,500',
 '$863,333',
 '$925,000',
 '$697,500',
 '$770,833']

In [23]:
df_aggregate = pd.DataFrame({
   'Team': team,
   'Name': player_name,   
   'Salary': salary})

df_aggregate

Unnamed: 0,Team,Name,Salary
0,coyotes,"Ekman-Larsson, Oliver","$8,250,000"
1,coyotes,"Kessel, Phil","$6,800,000"
2,coyotes,"Stepan, Derek","$6,500,000"
3,coyotes,"Schmaltz, Nick","$5,850,000"
4,coyotes,"Goligoski, Alex","$5,475,000"
5,coyotes,"Hossa, Marián","$5,275,000"
6,coyotes,"Hjalmarsson, Niklas","$5,000,000"
7,coyotes,"Söderberg, Carl","$4,750,000"
8,coyotes,"Chychrun, Jakob","$4,600,000"
9,coyotes,"Dvorak, Christian","$4,450,000"


In [24]:
first_part = "https://www.capfriendly.com/teams/"
second_part = "/cap-tracker/2020"
team_name = 'goldenknights'
url = first_part + team_name + second_part
url

'https://www.capfriendly.com/teams/goldenknights/cap-tracker/2020'

In [25]:
# Get data
page_salary = requests.get(url)
soup_salary_team = BeautifulSoup(page_salary.content, 'html.parser')
players = soup_salary_team.find_all('td')

# Create lists
number_of_repetitions = int(len(players)/19)
team = [team_name] * number_of_repetitions
player_name = []
salary = []

# Fill lists
for i in range (0, len(players)):
    if (i%19 ==0):
        string = str(players[i])
        index_1 = str(players[i]).find('">') + 2
        index_2 = str(players[i]).find('</')
        player_name_string = string[index_1:index_2]
        player_name.append(player_name_string)
        
    if (i%19 ==2):
        string = str(players[i])
        index_1 = str(players[i]).find('">') + 2
        index_2 = str(players[i]).find('</')
        player_salary_string = string[index_1:index_2]
        salary.append(player_salary_string)
        
# Create pandas
df_team = pd.DataFrame({
   'Team': team,
   'Name': player_name,   
   'Salary': salary})

df_team

Unnamed: 0,Team,Name,Salary
0,goldenknights,"Stone, Mark","$9,500,000"
1,goldenknights,"Fleury, Marc-André","$7,000,000"
2,goldenknights,"Pacioretty, Max","$7,000,000"
3,goldenknights,"Stastny, Paul","$6,500,000"
4,goldenknights,"Schmidt, Nate","$5,950,000"
5,goldenknights,"Karlsson, William","$5,900,000"
6,goldenknights,"Theodore, Shea","$5,200,000"
7,goldenknights,"Smith, Reilly","$5,000,000"
8,goldenknights,"Marchessault, Jonathan","$5,000,000"
9,goldenknights,"Tuch, Alex","$4,750,000"


In [26]:
# Aggregate the dfs
df_aggregate = df_aggregate.append(df_team)
df_aggregate

Unnamed: 0,Team,Name,Salary
0,coyotes,"Ekman-Larsson, Oliver","$8,250,000"
1,coyotes,"Kessel, Phil","$6,800,000"
2,coyotes,"Stepan, Derek","$6,500,000"
3,coyotes,"Schmaltz, Nick","$5,850,000"
4,coyotes,"Goligoski, Alex","$5,475,000"
...,...,...,...
49,goldenknights,"Røndbjerg, Jonas","$806,667"
50,goldenknights,"Kallionkieli, Marcus","$795,833"
51,goldenknights,"Pachal, Brayden","$765,000"
52,goldenknights,"Whitecloud, Zach",$0


### Create loop to run through all teams

In [27]:
# Excluding coyotes and goldenknights that have been added already
teams_list = ['ducks', 'bruins', 'sabres', 'flames', 'hurricanes', 'blackhawks', 'avalanche', 'bluejackets',
              'stars', 'redwings', 'oilers', 'panthers', 'kings', 'wild', 'canadiens', 'predators',
              'devils', 'islanders', 'rangers', 'senators', 'flyers', 'penguins', 'sharks', 'blues',
              'lightning', 'mapleleafs', 'canucks', 'capitals', 'jets']
len(teams_list)

# 29 is the desired length as we know that there were 31 teams in 2019

29

In [50]:
teams_list[28]

'jets'

In [28]:
first_part = "https://www.capfriendly.com/teams/"
second_part = "/cap-tracker/2020"

# Loop for each team
for a in range (0, len(teams_list)):
    team_name = teams_list[a]
    url = first_part + team_name + second_part
    url

    # Get data
    page_salary = requests.get(url)
    soup_salary_team = BeautifulSoup(page_salary.content, 'html.parser')
    players = soup_salary_team.find_all('td')

    # Create lists
    number_of_repetitions = int(len(players)/19)
    team = [team_name] * number_of_repetitions
    player_name = []
    salary = []

    # Fill lists
    for i in range (0, len(players)):
        if (i%19 ==0):
            string = str(players[i])
            index_1 = str(players[i]).find('">') + 2
            index_2 = str(players[i]).find('</')
            player_name_string = string[index_1:index_2]
            player_name.append(player_name_string)

        if (i%19 ==2):
            string = str(players[i])
            index_1 = str(players[i]).find('">') + 2
            index_2 = str(players[i]).find('</')
            player_salary_string = string[index_1:index_2]
            salary.append(player_salary_string)

    # Create pandas
    df_team = pd.DataFrame({
       'Team': team,
       'Name': player_name,   
       'Salary': salary})

    df_aggregate = df_aggregate.append(df_team)

In [29]:
df_aggregate

Unnamed: 0,Team,Name,Salary
0,coyotes,"Ekman-Larsson, Oliver","$8,250,000"
1,coyotes,"Kessel, Phil","$6,800,000"
2,coyotes,"Stepan, Derek","$6,500,000"
3,coyotes,"Schmaltz, Nick","$5,850,000"
4,coyotes,"Goligoski, Alex","$5,475,000"
...,...,...,...
43,jets,"Kovacevic, Johnny","$792,500"
44,jets,"McKenzie, Skyler","$741,666"
45,jets,"Griffith, Seth","$700,000"
46,jets,"Green, Luke","$733,333"


In [67]:
print(os.getcwd())

/Users/louisgenereux/Desktop/Hockey Analytics


In [30]:
df_aggregate.to_csv ('/Users/louisgenereux/Desktop/Hockey Analytics/2019_salaries.csv', index = False, header=True)