In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [2]:
# constrants notebook
################### Player id ################################

# A placeholder value indicating no player, e.g. to indicate
# pulled goalie
NOPLAYER = 'xxxxxxxNA'

########################### GCODE ##############################

# Each game of a season is assigned a unique 'gcode' number. The
# regular season games are between 20000 & 30000 and playoff games
# are over 30000

SEASON_START = 20000
PLAYOFFS_START = 30000
 
#################################################################

######################## Player columns #########################

# Use these arrays to grab all the columns for the home or away
# team in the WOI event data. They include include spots for a 6th
# attacker if the goalie is pulled. Note that at least one of the
# columns in each array will be NaN for every event because of this.


HOME_PLAYERS = ['homePlayer1','homePlayer2','homePlayer3','homePlayer4','homePlayer5','homePlayer6','Home_Goalie']
AWAY_PLAYERS = ['awayPlayer1','awayPlayer2','awayPlayer3','awayPlayer4','awayPlayer5','awayPlayer6','Away_Goalie']

# Same as above but without the goalies. 
HOME_SKATERS = ['homePlayer1','homePlayer2','homePlayer3','homePlayer4','homePlayer5','homePlayer6']
AWAY_SKATERS = ['awayPlayer1','awayPlayer2','awayPlayer3','awayPlayer4','awayPlayer5','awayPlayer6']

In [3]:
# event_filter notebook
def by_season(events, season):
    "Returns events from a given season. Season codes take the form '20142015'."
    return events[events['season'] == season]


############### By team ######################################

def by_home_team(events, team):
    "Returns events from homes games for `team` (use the standard 3-letter abbreviation for team names.)"
    return events[events['Home_Team'] == team]


def by_away_team(events, team):
    "Returns events from away games for `team` (use the standard 3-letter abbreviation for team names.)"
    return events[events['Away_Team'] == team]


def by_team(events, team):
    "Returns events from all games for `team` (use the standard 3-letter abbreviation for team names.)"
    home = by_home_team(events, team)
    away = by_away_team(events, team)
    return home.append(away).sort_values(by=['Game_Id'], axis=0)


def by_event_team(events, team):
    "Returns events where event team is `team` (use the standard 3-letter abbreviation for team names.)"
    return events[events['Ev_Team'] == team]


############### By players ######################################

def by_goalie(events, goalie):
    return events[(events['Home_Goalie'] == goalie) | (events['Away_Goalie'] == goalie)]


def by_home_goalie(events, goalie):
    return events[(events['Home_Goalie'] == goalie)]


def by_away_goalie(events, goalie):
    return events[(events['Away_Goalie'] == goalie)]


def by_skater(events, player):
    bool_vec = pd.Series(False, index=np.arange(len(events)))
    for position in (HOME_SKATERS + AWAY_SKATERS):
        bool_vec |= (events[position] == player)
    return events[bool_vec]


def by_home_skater(events, player):
    bool_vec = pd.Series(False, index=np.arange(len(events)))
    for position in HOME_PLAYERS:
        bool_vec |= (events[position] == player)
    return events[bool_vec]


def by_away_skater(events, player):
    bool_vec = pd.Series(False, index=np.arange(len(events)))
    for position in AWAY_SKATERS:
        bool_vec |= (events[position] == player)
    return events[bool_vec]


################### Period/Regulation/OT/Shootout ###########################

def period(events, period):
    "Return events from a given period of play."
    return events[events['Period'] == period]


def regulation(events):
    "Return events from regulation time, i.e. periods 1,2,3."
    return events[events['Period'] <= 3]


def overtime(events):
    "Return events from overtime, excluding shootouts."
    after_regulation = events[events['Period'] > 3]
    return remove_shootouts(after_regulation)


def shootouts(events):
    "Returns only shootout events."
    # Shootouts are period 5 of regular season games.
    return events[(events['Period'] == 5)
                  & (events['Game_Id'] <= PLAYOFFS_START)]


def remove_shootouts(events):
    "Removes all shootout events."
    # Shootouts are period 5 of regular season games.
    return events[(events['Period'] != 5)
                  | (events['Game_Id'] > PLAYOFFS_START)]


################## Man-advantage status #######################

def even_strength(events):
    "Return even-strength (5v5, 4v4, 3v3) events. "
    return events[events['Home_Players'] == events['Away_Players']]


def five_on_five(events):  # goalies count!
    return events[(events['Home_Players'] == 6) & (events['Away_Players'] == 6)]


def four_on_four(events):  # goalies count!
    return events[(events['Home_Players'] == 5) & (events['Away_Players'] == 5)]


def man_advantage(events):
    return events[events['Home_Players'] != events['Away_Players']]


def power_play(events, team):
    "Returns any power play events for specified team."
    return events[((events['Home_Team'] == team)
                   & (events['Home_Players'] > events['Away_Players']))
                  | ((events['Away_Team'] == team)
                     & (events['Away_Players'] > events['Home_Players']))]


def penalty_kill(events, team):
    "Returns any penalty_kill events for specified team."
    return events[((events['Home_Team'] == team)
                   & (events['Home_Players'] < events['Away_Players']))
                  | ((events['Away_Team'] == team)
                     & (events['Away_Players'] < events['Home_Players']))]


###################### Season status ##############################

def regular_season(events):
    "Return events from regular season games."
    return events[events['Game_Id'] <= PLAYOFFS_START]


def playoffs(events):
    "Return events from playoff games."
    return events[events['Game_Id'] > PLAYOFFS_START]


#################### Offensive events ##############################

def goals(events):
    return events[events['Event'] == 'GOAL']

def hits(events):
    return events[events['Event'] == 'HIT']


def shots(events):
    return events[(events['Event'] == 'SHOT') | (events['Event'] == 'GOAL')]


def shot_attempts(events):
    return events[(events['Event'] == 'SHOT') | (events['Event'] == 'GOAL')
                  | (events['Event'] == 'MISS') | (events['Event'] == 'BLOCK')]

def shooting_percentage(events):#TODO
    goal = goals(events)
    shot = shots(events)
    shot_percentage = goal/shot
    return shot_percentage

def record(events):
    Win = []
    Loss = []
    Tie = []
    if events['Home_Score'] > events['Away_Score']:
        Win.append(events['Home_Team'])
        Loss.append(events['Away_Team'])
    elif events['Home_Score'] < events['Away_Score']:
        Win.append(events['Away_Team'])
        Loss.append(events['Home_Team'])
    else:
        Tie.append(events['Home_Team'])
        Tie.append(events['Away_Team'])

    return Win, Loss, Tie




def game_winning_goals(events):  #TODO
    "Returns subset of the input events which represent game winning goals. Note that the input must include all non-shootout goals in a game for the output to be reliable."
    goal = remove_shootouts(events[events['Event'] == 'GOAL'])
    games = goal.groupby('Game_Id')

    def _find_gwg(game_goals):  # Returns empty df if a tie
        def _max_score(team):  # team = 'home' or 'away'
            score = game_goals[team + '_score'].max()
            if (game_goals.tail(1)['Ev_Team'].values[0] == game_goals[team + 'team'].values[0]):
                score += 1  # score entries don't include the goal just scored
            return score

        winner, loser = 'home', 'away'
        if _max_score('away') > _max_score('home'):
            winner, loser = 'away', 'home'
        winner_goals = game_goals[game_goals['Ev_Team'] == game_goals[winner + 'team']]
        return winner_goals[winner_goals[winner + '.score'] + 1 > _max_score(loser)].head(1)

    gwg = games.apply(_find_gwg)
    gwg.index = gwg.index.levels[1]  # removes the redundant gcode indexing layer
    return gwg



#################### Defensive events ##############################

def blocked_shots(events):
    return events[events['Event'] == 'BLOCK']


#################### Goalie events ###################################

def saves(events):

    return events[events['Event'] == 'SHOT']


In [4]:
#aggregate notebook
def players_goals(events):
    goal = goals(events)
    players_goals = goal.groupby('p1_name').size()
    players_goals.index.name = 'player'
    return players_goals



def corsi_for(events):
    shot = shot_attempts(events)
    corsi_for = shot.groupby('Ev_Team').size()
    corsi_for.index.name = 'Ev_Team'
    return corsi_for


def players_assists(events):
    goal = goals(events)
    players_assists = pd.melt(goal[['p2_name', 'p3_name']]).dropna().groupby('value').size()
    players_assists.index.name = 'player'
    return players_assists


def players_points(events):
    goal = players_goals(events)
    assists = players_assists(events)
    players_points = goal.add(assists, fill_value=0)
    players_points.index.name = 'player'
    return players_points


def skaters_total_scoring(events):  #TODO
    pass

#################################################################
################# Team Aggregation ##############################

def team_goals_by_game(events):
    goal = goals(events)
    team_goals_by_game = goal.groupby('Ev_Team').size()
    team_goals_by_game.index.name = 'Ev_Team'
    return team_goals_by_game

def team_goals(events):
    goal = goals(events)
    team_goals = goal.groupby('Ev_Team').size()
    team_goals.index.name = 'Ev_Team'
    return team_goals

def team_hits(events):
    hit = hits(events)
    team_hits = hit.groupby('Ev_Team').size()
    team_hits.index.name = 'Ev_Team'
    return team_hits

def team_blocks_against(events):
    block = blocked_shots(events)
    team_blocks = block.groupby('Ev_Team').size()
    team_blocks.index.name = 'Ev_Team'
    return team_blocks

def team_SOG(events):
    shot = shots(events)
    team_SOG = shot.groupby('Ev_Team').size()
    team_SOG.index.name = 'Ev_Team'
    return team_SOG

def team_shot_attempts(events):
    shot = shot_attempts(events)
    team_shot_attempts = shot.groupby('Ev_Team').size()
    team_shot_attempts.index.name = 'Ev_Team'
    return team_shot_attempts

def team_shot_attempts_against(events):
    shot = shot_attempts(events)
    if events['Ev_Team'] == events['Home_Team']:
        team_shot_attempts_against = shot.groupby('Away_Team').size()
        team_shot_attempts_against.index.name = 'Away_Team'
    else:
        team_shot_attempts_against = shot.groupby('Home_Team').size()
        team_shot_attempts_against.index.name = 'Home_Team'
    return team_shot_attempts

def team_saves(events):#TODD
    sog = team_shot_attempts(events)
    pass


#################################################################

############### Goalies Aggregation #############################

def _goalie_index(events):
    # dropna() removes nan from empty net situations
    def uniq_col(x): return set(events[x].dropna().unique())

    goalies = sorted(list(uniq_col('Away_Goalie').union(uniq_col('Home_Goalie'))))
    return pd.Index(goalies)


def goalies_games_played(events):  #TODO
    games = events['Game_Id'].unique()
    for game in games:
        pass


def goalie_records(events, goalie_index=None):  #TODO
    "Aggregate goalie win/loss record over input events. Returns data frame with total wins/losses, and home & away win/losses. A win/loss is recorded only if the goalie is on the ice for the game-winning-goal. A list or index of goalies can be input if already calculated; otherwise computed on the fly from events."
    g = evfilter.game_winning_goals(events)
    wins = {t: g[g['Ev_Team'] == g[t + 'team']] for t in ['home', 'away']}
    records = {
        'home_wins': wins['home'].groupby('Home_Goalie').size(),
        'home_losses': wins['away'].groupby('Home_Goalie').size(),
        'away_wins': wins['away'].groupby('Away_Goalie').size(),
        'away_losses': wins['home'].groupby('Away_Goalie').size(),
    }
    index = goalie_index if goalie_index is not None else _goalie_index(events)
    res = pd.DataFrame(index=index, data=records).fillna(0)
    res.insert(0, 'losses', res['home_losses'] + res['away_losses'])
    res.insert(0, 'wins', res['home_wins'] + res['away_wins'])
    return res


def goalies_games_started(events, goalie_index=None):
    "Aggregate goalie starts over input events. A list or index of goalies can be input if already calculated; otherwise computed on the fly from events. Returns data frame with columns: (starts,home_starts,away_starts)."
    index = goalie_index if goalie_index is not None else _goalie_index(events)
    res = pd.DataFrame(index=index)
    games = events.groupby('Game_Id').head(1)
    res.insert(len(res.columns), 'home_starts', games['Home_Goalie'].value_counts())
    res.insert(len(res.columns), 'away_starts', games['Away_Goalie'].value_counts())
    res.fillna(0, inplace=True)
    res.insert(0, 'starts', res['home_starts'] + res['away_starts'])
    return res


def goals_against(events):
    "Aggregate goals against for individual goalies over the input set of events. Returns a series indexed by goalie text_id."
    goal = goals(events)
    goalies = pd.Series(data=goal['Home_Goalie'], index=goal.index)
    goalies[goal['Ev_Team'] == goal['Home_Team']] = goal['Away_Goalie']
    return goalies.dropna().value_counts().sort_index()


def goalie_saves(events):
    "Aggregate saves for individual goalies over the input set of events. Returns a series indexed by goalie text_id."
    saved_shots = saves(events)
    goalie = pd.Series(data=saved_shots['Home_Goalie'], index=saved_shots.index)
    goalie[saved_shots['Ev_Team'] == saved_shots['Home_Team']] = saved_shots['Away_Goalie']
    return goalie.dropna().value_counts().sort_index()


def goalies_stats(events):  #TODO
    "Individual goalie data aggregated over the input events"
    stats = pd.DataFrame(index=_goalie_index(events))
    stats.insert(len(stats.columns), 'goals_against', goals_against(events))
    stats.insert(len(stats.columns), 'saves', saves(events))
    stats = stats.join(goalie_records(events, stats.index))
    stats.fillna(0, inplace=True)
    stats.insert(len(stats.columns), 'shots_against', (stats['saves'] + stats['goals_against']))
    stats.insert(len(stats.columns), 'save%', stats['saves'] / (stats['saves'] + stats['goals_against']))
    return stats

In [5]:
df = pd.read_csv('data/2017_2018out.csv')

In [6]:
df.dtypes

Game_Id              int64
Date                object
Period               int64
Event               object
Description         object
Time_Elapsed        object
Seconds_Elapsed    float64
Strength            object
Ev_Zone             object
Type                object
Ev_Team             object
Home_Zone           object
Away_Team           object
Home_Team           object
p1_name             object
p1_ID              float64
p2_name             object
p2_ID              float64
p3_name             object
p3_ID              float64
awayPlayer1         object
awayPlayer1_id     float64
awayPlayer2         object
awayPlayer2_id     float64
awayPlayer3         object
awayPlayer3_id     float64
awayPlayer4         object
awayPlayer4_id     float64
awayPlayer5         object
awayPlayer5_id     float64
awayPlayer6         object
awayPlayer6_id     float64
homePlayer1         object
homePlayer1_id     float64
homePlayer2         object
homePlayer2_id     float64
homePlayer3         object
h

In [7]:
df.head()

Unnamed: 0,Game_Id,Date,Period,Event,Description,Time_Elapsed,Seconds_Elapsed,Strength,Ev_Zone,Type,...,Away_Score,Home_Score,Away_Goalie,Away_Goalie_Id,Home_Goalie,Home_Goalie_Id,xC,yC,Home_Coach,Away_Coach
0,20001,2017-10-04,1,PSTR,Period Start- Local time: 6:17 CDT,0:00,0.0,5x5,,,...,0,0,FREDERIK ANDERSEN,8475883.0,STEVE MASON,8473461.0,,,PAUL MAURICE,MIKE BABCOCK
1,20001,2017-10-04,1,FAC,TOR won Neu. Zone - TOR #43 KADRI vs WPG #55 S...,0:00,0.0,5x5,Neu,,...,0,0,FREDERIK ANDERSEN,8475883.0,STEVE MASON,8473461.0,0.0,0.0,PAUL MAURICE,MIKE BABCOCK
2,20001,2017-10-04,1,BLOCK,"TOR #43 KADRI BLOCKED BY WPG #39 ENSTROM, Wri...",0:12,12.0,5x5,Def,WRIST SHOT,...,0,0,FREDERIK ANDERSEN,8475883.0,STEVE MASON,8473461.0,55.0,-22.0,PAUL MAURICE,MIKE BABCOCK
3,20001,2017-10-04,1,SHOT,"WPG ONGOAL - #44 MORRISSEY, Wrist, Off. Zone, ...",0:38,38.0,5x5,Off,WRIST SHOT,...,0,0,FREDERIK ANDERSEN,8475883.0,STEVE MASON,8473461.0,-36.0,-28.0,PAUL MAURICE,MIKE BABCOCK
4,20001,2017-10-04,1,HIT,"TOR #47 KOMAROV HIT WPG #44 MORRISSEY, Def. Zone",0:41,41.0,5x5,Def,,...,0,0,FREDERIK ANDERSEN,8475883.0,STEVE MASON,8473461.0,-60.0,-40.0,PAUL MAURICE,MIKE BABCOCK


In [8]:
df.columns

Index(['Game_Id', 'Date', 'Period', 'Event', 'Description', 'Time_Elapsed',
       'Seconds_Elapsed', 'Strength', 'Ev_Zone', 'Type', 'Ev_Team',
       'Home_Zone', 'Away_Team', 'Home_Team', 'p1_name', 'p1_ID', 'p2_name',
       'p2_ID', 'p3_name', 'p3_ID', 'awayPlayer1', 'awayPlayer1_id',
       'awayPlayer2', 'awayPlayer2_id', 'awayPlayer3', 'awayPlayer3_id',
       'awayPlayer4', 'awayPlayer4_id', 'awayPlayer5', 'awayPlayer5_id',
       'awayPlayer6', 'awayPlayer6_id', 'homePlayer1', 'homePlayer1_id',
       'homePlayer2', 'homePlayer2_id', 'homePlayer3', 'homePlayer3_id',
       'homePlayer4', 'homePlayer4_id', 'homePlayer5', 'homePlayer5_id',
       'homePlayer6', 'homePlayer6_id', 'Away_Players', 'Home_Players',
       'Away_Score', 'Home_Score', 'Away_Goalie', 'Away_Goalie_Id',
       'Home_Goalie', 'Home_Goalie_Id', 'xC', 'yC', 'Home_Coach',
       'Away_Coach'],
      dtype='object')

In [9]:
regDf = regular_season(df)
regDf = remove_shootouts(regDf)
regDf2 = regDf
regDf.drop(['Description', 'Time_Elapsed',
       'Seconds_Elapsed', 'Strength', 'Ev_Zone', 'Type',
       'Home_Zone', 'p1_name', 'p1_ID', 'p2_name',
       'p2_ID', 'p3_name', 'p3_ID', 'awayPlayer1', 'awayPlayer1_id',
       'awayPlayer2', 'awayPlayer2_id', 'awayPlayer3', 'awayPlayer3_id',
       'awayPlayer4', 'awayPlayer4_id', 'awayPlayer5', 'awayPlayer5_id',
       'awayPlayer6', 'awayPlayer6_id', 'homePlayer1', 'homePlayer1_id',
       'homePlayer2', 'homePlayer2_id', 'homePlayer3', 'homePlayer3_id',
       'homePlayer4', 'homePlayer4_id', 'homePlayer5', 'homePlayer5_id',
       'homePlayer6', 'homePlayer6_id', 'Away_Players', 'Home_Players','Away_Goalie', 'Away_Goalie_Id', 'Home_Goalie',
       'Home_Goalie_Id', 'xC', 'yC','Home_Coach', 'Away_Coach'], axis=1, inplace=True)

In [10]:
regDfShotAtt = regDf[regDf.Event.isin(['SHOT','GOAL','MISS','BLOCK'])]
regDfMiss = regDf[regDf.Event.isin(['MISS'])]
regDfSOG = regDf[regDf.Event.isin(['SHOT', 'GOAL'])]
regDfGoal = regDf[regDf.Event.isin(['GOAL'])]
regDfHit = regDf[regDf.Event.isin(['HIT'])]
regDfBlock = regDf[regDf.Event.isin(['BLOCK'])]

regDf.set_index(['Game_Id','Ev_Team'], inplace=True)

In [11]:
regDfShotAtt = (regDfShotAtt.groupby(['Game_Id','Ev_Team'])['Event'].size().reset_index())
regDfShotAtt.rename(columns={'Event':'Shot_Att'}, inplace=True)
regDfSOG = (regDfSOG.groupby(['Game_Id', 'Ev_Team'])['Event'].size().reset_index())
regDfSOG.rename(columns={'Event':'SOG_for'}, inplace=True)
regDfHit = (regDfHit.groupby(['Game_Id', 'Ev_Team'])['Event'].size().reset_index())
regDfHit.rename(columns={'Event':'Hits'}, inplace=True)
regDfGoal = (regDfGoal.groupby(['Game_Id','Ev_Team'])['Event'].size().reset_index())
regDfGoal.rename(columns={'Event':'Goals_for'}, inplace=True)
regDfblocks = (regDfBlock.groupby(['Game_Id', 'Ev_Team'])['Event'].size().reset_index())
regDfblocks.rename(columns={'Event':'Blocks_against'}, inplace=True)
regDfMiss = (regDfMiss.groupby(['Game_Id','Ev_Team'])['Event'].size().reset_index())
regDfMiss.rename(columns={'Event':'Misses'}, inplace=True)
regDf2018 = pd.concat([regDfShotAtt,regDfMiss['Misses'],regDfSOG['SOG_for'],regDfHit['Hits'],regDfblocks['Blocks_against']], axis=1)

regDf2018complete = regDf2018.merge(right=regDfGoal, left_on=['Game_Id','Ev_Team'],right_on=['Game_Id','Ev_Team'], how='left')

regDf2018complete.fillna(0,inplace=True)

regDf2018complete['Shot_Percentage_for'] = (regDf2018complete['Goals_for']/regDf2018complete['SOG_for'])*100
regDf2018complete['Fenwick_for'] = regDf2018complete['SOG_for'] + regDf2018complete['Misses']
regDf2018complete['Corsi_for'] = regDf2018complete['Shot_Att']
regDf2018complete['FSH%'] = (regDf2018complete['Goals_for']/regDf2018complete['Fenwick_for'])*100
regDf2018complete['Miss%'] = (regDf2018complete['Misses']/regDf2018complete['Fenwick_for'])*100
regDf2018complete['wshF'] = (regDf2018complete['Goals_for'] + (0.2*(regDf2018complete['Corsi_for'] - regDf2018complete['Goals_for'])))

In [12]:
print(regDf2018.head())
print(regDf2018.tail())
print(regDf2018.info())
print(regDf2018complete.head())
print(regDf2018complete.tail())
print(regDf2018complete.info())
print(regDf2018complete.columns)

   Game_Id Ev_Team  Shot_Att  Misses  SOG_for  Hits  Blocks_against
0    20001     TOR        53       8       31    16              14
1    20001     WPG        71      10       37    18              24
2    20002     PIT        72      20       33    27              19
3    20002     STL        56      12       34    36              10
4    20003     CGY        58      13       27    29              18
      Game_Id Ev_Team  Shot_Att  Misses  SOG_for  Hits  Blocks_against
2537    21269     VAN        69      15       38    19              16
2538    21270     DAL        32       8       18    11               6
2539    21270     LAK        63      12       36    15              15
2540    21271     MIN        55       8       24    15              23
2541    21271     SJS        70      18       30    16              22
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2542 entries, 0 to 2541
Data columns (total 7 columns):
Game_Id           2542 non-null int64
Ev_Team           2542

In [13]:

filterbyhits = hits(regDf)
hitsbygame = filterbyhits.groupby(['Game_Id']).apply(team_hits)
filterbyshotattempts = shot_attempts(regDf)
shotAttemptsbyGame = filterbyshotattempts.groupby(['Game_Id']).apply(team_shot_attempts)

In [14]:
regDf2018complete.head()

Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Percentage_for,Fenwick_for,Corsi_for,FSH%,Miss%,wshF
0,20001,TOR,53,8,31,16,14,7.0,22.580645,39,53,17.948718,20.512821,16.2
1,20001,WPG,71,10,37,18,24,2.0,5.405405,47,71,4.255319,21.276596,15.8
2,20002,PIT,72,20,33,27,19,4.0,12.121212,53,72,7.54717,37.735849,17.6
3,20002,STL,56,12,34,36,10,5.0,14.705882,46,56,10.869565,26.086957,15.2
4,20003,CGY,58,13,27,29,18,0.0,0.0,40,58,0.0,32.5,11.6


In [15]:
regDf2018complete.columns

Index(['Game_Id', 'Ev_Team', 'Shot_Att', 'Misses', 'SOG_for', 'Hits',
       'Blocks_against', 'Goals_for', 'Shot_Percentage_for', 'Fenwick_for',
       'Corsi_for', 'FSH%', 'Miss%', 'wshF'],
      dtype='object')

In [16]:
brents = regDf2018complete[['Game_Id', 'Ev_Team', 'Shot_Att', 'Misses', 'SOG_for', 'Hits', 'Blocks_against', 'Goals_for']]
brents.head()

Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for
0,20001,TOR,53,8,31,16,14,7.0
1,20001,WPG,71,10,37,18,24,2.0
2,20002,PIT,72,20,33,27,19,4.0
3,20002,STL,56,12,34,36,10,5.0
4,20003,CGY,58,13,27,29,18,0.0


In [105]:
brents.to_csv('brents.csv')

In [17]:
games_list = list(brents.Game_Id.unique())

In [18]:
brents[brents.Game_Id == games_list[0]]

Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for
0,20001,TOR,53,8,31,16,14,7.0
1,20001,WPG,71,10,37,18,24,2.0


In [None]:
two_rows = brents[brents['Game_Id'] == games_list[2]]
two_rows

In [None]:
two_rows['Shot_Att'][4]

In [None]:
brents['Shot_Att_Agnst'][2] = 2

In [30]:
brents[brents.Game_Id == games_list[0]]['Shot_Att_Agnst']

0    0
1    0
Name: Shot_Att_Agnst, dtype: int64

In [43]:
brents[brents.Game_Id == games_list[i]]['Shot_Att'][1]

71

In [44]:
brents[brents.Game_Id == games_list[i]]['Shot_Att_Agnst'][0]

0

In [38]:

brents[brents.Game_Id == games_list[i]]['Shot_Att_Agnst'][0] = brents[brents.Game_Id == games_list[i]]['Shot_Att'][1]

SyntaxError: invalid syntax (<ipython-input-38-86ddb34a0d86>, line 1)

In [19]:
brents.head()

Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for
0,20001,TOR,53,8,31,16,14,7.0
1,20001,WPG,71,10,37,18,24,2.0
2,20002,PIT,72,20,33,27,19,4.0
3,20002,STL,56,12,34,36,10,5.0
4,20003,CGY,58,13,27,29,18,0.0


In [61]:
brents[brents.Game_Id == games_list[0]].loc[1,'Shot_Att']

71

In [25]:
brents['Shot_Att_A'] =0
j=1
for i in range(len(games_list)):
    brents['Shot_Att_A'][i*2] = brents['Shot_Att'][i+j]
    brents['Shot_Att_A'][i+j] = brents['Shot_Att'][i*2]
    j = j + 1
    
    
brents.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A
0,20001,TOR,53,8,31,16,14,7.0,71
1,20001,WPG,71,10,37,18,24,2.0,53
2,20002,PIT,72,20,33,27,19,4.0,56
3,20002,STL,56,12,34,36,10,5.0,72
4,20003,CGY,58,13,27,29,18,0.0,80


In [27]:
brents.tail(10)

Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A
2532,21267,ANA,51,14,27,14,10,3.0,65
2533,21267,ARI,65,17,31,23,17,0.0,51
2534,21268,CGY,60,14,31,19,15,7.0,60
2535,21268,VGK,60,15,27,16,18,1.0,60
2536,21269,EDM,65,17,33,12,15,2.0,69
2537,21269,VAN,69,15,38,19,16,2.0,65
2538,21270,DAL,32,8,18,11,6,4.0,63
2539,21270,LAK,63,12,36,15,15,2.0,32
2540,21271,MIN,55,8,24,15,23,6.0,70
2541,21271,SJS,70,18,30,16,22,3.0,55


In [28]:
brents['SOG_A'] =0
j=1
for i in range(len(games_list)):
    brents['SOG_A'][i*2] = brents['SOG_for'][i+j]
    brents['SOG_A'][i+j] = brents['SOG_for'][i*2]
    j = j + 1
brents.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A,SOG_A
0,20001,TOR,53,8,31,16,14,7.0,71,37
1,20001,WPG,71,10,37,18,24,2.0,53,31
2,20002,PIT,72,20,33,27,19,4.0,56,34
3,20002,STL,56,12,34,36,10,5.0,72,33
4,20003,CGY,58,13,27,29,18,0.0,80,45
5,20003,EDM,80,18,45,31,17,3.0,58,27
6,20004,PHI,60,12,31,21,17,5.0,64,35
7,20004,SJS,64,12,35,14,17,3.0,60,31
8,20005,BOS,54,12,32,23,10,4.0,46,29
9,20005,NSH,46,10,29,25,7,3.0,54,32


In [29]:
brents['Hits_A'] =0
j=1
for i in range(len(games_list)):
    brents['Hits_A'][i*2] = brents['Hits'][i+j]
    brents['Hits_A'][i+j] = brents['Hits'][i*2]
    j = j + 1
brents.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A,SOG_A,Hits_A
0,20001,TOR,53,8,31,16,14,7.0,71,37,18
1,20001,WPG,71,10,37,18,24,2.0,53,31,16
2,20002,PIT,72,20,33,27,19,4.0,56,34,36
3,20002,STL,56,12,34,36,10,5.0,72,33,27
4,20003,CGY,58,13,27,29,18,0.0,80,45,31
5,20003,EDM,80,18,45,31,17,3.0,58,27,29
6,20004,PHI,60,12,31,21,17,5.0,64,35,14
7,20004,SJS,64,12,35,14,17,3.0,60,31,21
8,20005,BOS,54,12,32,23,10,4.0,46,29,25
9,20005,NSH,46,10,29,25,7,3.0,54,32,23


In [30]:
brents['Blocks_for'] =0
j=1
for i in range(len(games_list)):
    brents['Blocks_for'][i*2] = brents['Blocks_against'][i+j]
    brents['Blocks_for'][i+j] = brents['Blocks_against'][i*2]
    j = j + 1
brents.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A,SOG_A,Hits_A,Blocks_for
0,20001,TOR,53,8,31,16,14,7.0,71,37,18,24
1,20001,WPG,71,10,37,18,24,2.0,53,31,16,14
2,20002,PIT,72,20,33,27,19,4.0,56,34,36,10
3,20002,STL,56,12,34,36,10,5.0,72,33,27,19
4,20003,CGY,58,13,27,29,18,0.0,80,45,31,17
5,20003,EDM,80,18,45,31,17,3.0,58,27,29,18
6,20004,PHI,60,12,31,21,17,5.0,64,35,14,17
7,20004,SJS,64,12,35,14,17,3.0,60,31,21,17
8,20005,BOS,54,12,32,23,10,4.0,46,29,25,7
9,20005,NSH,46,10,29,25,7,3.0,54,32,23,10


In [31]:
brents['Goals_A'] =0
j=1
for i in range(len(games_list)):
    brents['Goals_A'][i*2] = brents['Goals_for'][i+j]
    brents['Goals_A'][i+j] = brents['Goals_for'][i*2]
    j = j + 1
brents.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Game_Id,Ev_Team,Shot_Att,Misses,SOG_for,Hits,Blocks_against,Goals_for,Shot_Att_A,SOG_A,Hits_A,Blocks_for,Goals_A
0,20001,TOR,53,8,31,16,14,7.0,71,37,18,24,2
1,20001,WPG,71,10,37,18,24,2.0,53,31,16,14,7
2,20002,PIT,72,20,33,27,19,4.0,56,34,36,10,5
3,20002,STL,56,12,34,36,10,5.0,72,33,27,19,4
4,20003,CGY,58,13,27,29,18,0.0,80,45,31,17,3
5,20003,EDM,80,18,45,31,17,3.0,58,27,29,18,0
6,20004,PHI,60,12,31,21,17,5.0,64,35,14,17,3
7,20004,SJS,64,12,35,14,17,3.0,60,31,21,17,5
8,20005,BOS,54,12,32,23,10,4.0,46,29,25,7,3
9,20005,NSH,46,10,29,25,7,3.0,54,32,23,10,4


In [None]:
brents['Blocks_for'] =0
j=1
for i in range(len(games_list)):
    brents['Blocks_for'][i*2] = brents['Blocks_against'][i+j]
    brents['Blocks_for'][i+j] = brents['Blocks_against'][i*2]
    j = j + 1
brents.head(10)

In [None]:
print(regDf2018complete.head())

In [None]:
newDf = pd.DataFrame([hitsbygame, shotAttemptsbyGame])
print(newDf.head())

In [None]:
caps_2017_2018.head()

In [None]:
goals18 = goals(nhl_20172018)

In [None]:
shots18 = shots(nhl_20172018)

In [None]:
shot_attempts18 = shot_attempts(nhl_20172018)

In [None]:
regular_season18 = regular_season(nhl_20172018)

In [None]:
saves_18 = goalie_saves(nhl_20172018)

In [None]:
players_goals18 = players_goals(nhl_20172018)

In [None]:
players18.sort_values().tail(20)

In [None]:
player_goals_rs18 = players_goals(regular_season18)

In [None]:
player_goals_rs18.sort_values().tail(20)

In [None]:
player_assists18 = players_assists(nhl_20172018)

In [None]:
player_assists18.sort_values().tail(20)

In [None]:
player_points18 = players_points(nhl_20172018)

In [None]:
player_points18.sort_values().tail(20)

In [None]:
goals_given18 = goals_against(nhl_20172018)

In [None]:
goals_given18.sort_values().tail(20)

In [None]:
saves18 = goalie_saves(nhl_20172018)