# Introduction to probability models

# Objectives
1. Understand the difference between conditional and unconditional probabilties
2. State Bayes Rule in probability and odds form
3. Demonstrate understanding of prior and inverse conditional probabilities
4. Demonstrate understanding of Bayes Factor
2. Use Bayes rule to calculate conditional probabilities

# Bayes Rule

$$p(g_s| X_s) = \frac{p(X_s|g_s) p(g_s)}{ p(X_s | g_s) p(g_s) +  p(X_s | \bar{g_s}) p( \bar{g_s}) }$$

where,

$X_s$: various game factors (i.e., score differential, period game states, shottype)

$P(g_s| X_s) $ : posterior (conditional) probability of a goal given event X

$P(g_s) $ : prior (unconditional) probability of a goal

$P(\bar{g_s}) $: prior (unconditional) probability not a goal

$P(X_s|g_s) $: inverse conditional probability given a goal

$P(X_s|\bar{g_s}) $: inverse conditional probability given not a goal

In [126]:
%matplotlib inline
import os
import sys
import pandas
import numpy
import matplotlib
import matplotlib.pyplot as plt
pandas.set_option('display.notebook_repr_html', False)
pandas.set_option('display.max_columns', 20)
pandas.set_option('display.max_rows', 25)

In [127]:
print('working directory: ', os.getcwd())

('working directory: ', '/home/vmuser/Documents/4P94/03-bayes_rule')


In [128]:
dm = pandas.read_csv('shots.csv')

In [129]:
dm.head()

   season  gamenumber   gamedate vteamcode hteamcode  eventnumber  period  \
0    2011       20001  10/6/2011       PHI       BOS            4       1   
1    2011       20001  10/6/2011       PHI       BOS            9       1   
2    2011       20001  10/6/2011       PHI       BOS           14       1   
3    2011       20001  10/6/2011       PHI       BOS           25       1   
4    2011       20001  10/6/2011       PHI       BOS           29       1   

   eventtimefromzero advantagetypeshooter advantagetypegoalie   ...     \
0                 47                   EV                  EV   ...      
1                114                   EV                  EV   ...      
2                138                   EV                  EV   ...      
3                249                   EV                  EV   ...      
4                297                   EV                  EV   ...      

  position      namegoalie  tgoals ogoals  zone   X   Y  XNorm  YNorm shotType  
0        G 

In [130]:
dm.columns

Index([u'season', u'gamenumber', u'gamedate', u'vteamcode', u'hteamcode',
       u'eventnumber', u'period', u'eventtimefromzero',
       u'advantagetypeshooter', u'advantagetypegoalie', u'subseasontype',
       u'teamcode', u'isTHome', u'eventtype', u'playernumber', u'position',
       u'namegoalie', u'tgoals', u'ogoals', u'zone', u'X', u'Y', u'XNorm',
       u'YNorm', u'shotType'],
      dtype='object')

In [131]:
dm.describe()

        season    gamenumber   eventnumber        period  eventtimefromzero  \
count  59426.0  59426.000000  59426.000000  59426.000000       59426.000000   
mean    2011.0  20621.620772    154.604567      2.034160         584.551779   
std        0.0    353.973953     89.815357      0.851267         347.956939   
min     2011.0  20001.000000      3.000000      1.000000           3.000000   
25%     2011.0  20317.000000     77.000000      1.000000         277.000000   
50%     2011.0  20624.000000    154.000000      2.000000         573.000000   
75%     2011.0  20927.000000    229.000000      3.000000         886.000000   
max     2011.0  21230.000000    392.000000      4.000000        1200.000000   

            isTHome  playernumber        tgoals        ogoals             X  \
count  59426.000000  59426.000000  59426.000000  59426.000000  59426.000000   
mean       0.486117     29.905462      1.307458      1.181453      0.273971   
std        0.499811     13.602276      1.367570    

In [132]:
dm['dscore'] = dm['tgoals'] = dm['ogoals']

In [133]:
dm['secStart'] = ((dm['period']-1)*1200 + dm['eventtimefromzero'])
dm = dm.sort_values(by=['season', 'gamenumber', 'secStart'], ascending=[1, 1, 1])
dm['minStart'] = (dm['secStart']/60).astype(int)

In [134]:
dm.head()

   season  gamenumber   gamedate vteamcode hteamcode  eventnumber  period  \
0    2011       20001  10/6/2011       PHI       BOS            4       1   
1    2011       20001  10/6/2011       PHI       BOS            9       1   
2    2011       20001  10/6/2011       PHI       BOS           14       1   
3    2011       20001  10/6/2011       PHI       BOS           25       1   
4    2011       20001  10/6/2011       PHI       BOS           29       1   

   eventtimefromzero advantagetypeshooter advantagetypegoalie   ...    ogoals  \
0                 47                   EV                  EV   ...         0   
1                114                   EV                  EV   ...         0   
2                138                   EV                  EV   ...         0   
3                249                   EV                  EV   ...         0   
4                297                   EV                  EV   ...         0   

  zone   X   Y  XNorm YNorm shotType  dscore  secS

In [135]:
dm['dscore'] = dm['tgoals'] - dm['ogoals']
dm['dscore'].describe()

count    59426.0
mean         0.0
std          0.0
min          0.0
25%          0.0
50%          0.0
75%          0.0
max          0.0
Name: dscore, dtype: float64

In [136]:
dm['dscore'] =  numpy.where(dm['dscore']>3,  3 , dm['dscore'] )
dm['dscore'] =  numpy.where(dm['dscore']<3, -3 , dm['dscore'] )
dm['dscore'].describe()

count    59426.0
mean        -3.0
std          0.0
min         -3.0
25%         -3.0
50%         -3.0
75%         -3.0
max         -3.0
Name: dscore, dtype: float64

In [137]:
dm['isgoal'] = numpy.where(dm['eventtype']=='GOAL', 1 , 0)

In [138]:
dm['eventtype'].value_counts()

SHOT    54724
GOAL     4702
Name: eventtype, dtype: int64

In [139]:
dm['shotType'].value_counts()

Wrist          29240
Slap           12255
Snap            8421
Backhand        5277
Tip-In          2395
Wrap-around      952
Deflected        886
Name: shotType, dtype: int64

In [140]:
dm['iswrist'] = numpy.where(dm['shotType']=='Wrist', 1 , 0)
dm['isslap']  = numpy.where(dm['shotType']=='Slap', 1 , 0)

In [141]:
dm.groupby(['shotType'])['eventtype'].value_counts()

shotType     eventtype
Backhand     SHOT          4761
             GOAL           516
Deflected    SHOT           713
             GOAL           173
Slap         SHOT         11707
             GOAL           548
Snap         SHOT          7734
             GOAL           687
Tip-In       SHOT          1959
             GOAL           436
Wrap-around  SHOT           903
             GOAL            49
Wrist        SHOT         26947
             GOAL          2293
Name: eventtype, dtype: int64

In [142]:
dm.groupby(['dscore'])['eventtype'].value_counts()

dscore  eventtype
-3      SHOT         54724
        GOAL          4702
Name: eventtype, dtype: int64

In [143]:
dm.groupby(['shotType'])['isgoal'].mean()

shotType
Backhand       0.097783
Deflected      0.195260
Slap           0.044716
Snap           0.081582
Tip-In         0.182046
Wrap-around    0.051471
Wrist          0.078420
Name: isgoal, dtype: float64

In [144]:
dm.groupby(['isgoal'])['iswrist'].mean()


isgoal
0    0.492416
1    0.487665
Name: iswrist, dtype: float64

In [145]:
dm.groupby(['isgoal'])['isslap'].mean()

isgoal
0    0.213928
1    0.116546
Name: isslap, dtype: float64

In [146]:
dm.groupby(['dscore'])['isgoal'].mean()

dscore
-3    0.079124
Name: isgoal, dtype: float64

In [147]:
p_goal = dm['isgoal'].mean()
p_goal

0.079123615925689089

In [148]:
p_notgoal = 1- dm['isgoal'].mean()
p_notgoal

0.92087638407431094

In [149]:
dg = dm[dm['isgoal']==1]
dn = dm[dm['isgoal']==0]

In [150]:
p_wrist_goal = dg['iswrist'].mean()
p_wrist_goal

0.48766482347937046

In [151]:
p_wrist_notgoal = dn['iswrist'].mean()
p_wrist_notgoal

0.49241649002265914

In [160]:
p_goal_wrist = p_wrist_goal*p_goal / (p_wrist_goal*p_goal + p_wrist_notgoal*p_notgoal  )
p_goal_wrist

0.078419972640218868

In [153]:
p_slap_goal = dg['isslap'].mean()
p_slap_goal

0.11654615057422374

In [159]:
p_slap_notgoal = dn['isslap'].mean()
p_slap_notgoal

0.21392807543308237

In [157]:
p_goal_slap = p_slap_goal*p_goal / (p_slap_goal*p_goal + p_slap_notgoal*p_notgoal  )
p_goal_slap

0.044716442268461852

In [158]:
dm.groupby(['shotType'])['isgoal'].mean()

shotType
Backhand       0.097783
Deflected      0.195260
Slap           0.044716
Snap           0.081582
Tip-In         0.182046
Wrap-around    0.051471
Wrist          0.078420
Name: isgoal, dtype: float64