In [9]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import numpy as np
from scipy import stats
from numpy import arange,array,ones
import matplotlib.pyplot as plt
import pandas as pd

plotly.offline.init_notebook_mode()

In [10]:
df = pd.read_csv('data-v2.csv')

In [11]:
df.head()

Unnamed: 0,season,team,numGamesPlayoffs,numBlocksPlayoffs,numGamesRegular,numBlocksRegular
0,20062007,1,11,172,82,1127
1,20062007,2,5,91,82,1307
2,20062007,3,10,159,82,1386
3,20062007,5,5,88,82,1238
4,20062007,7,16,290,82,1248


In [12]:
df['blkPerGmRegular'] = df.numBlocksRegular / df.numGamesRegular
df['blkPerGmPlayoff'] = df.numBlocksPlayoffs / df.numGamesPlayoffs
df['blkDiff'] = df.blkPerGmPlayoff - df.blkPerGmRegular

In [13]:
df.head()

Unnamed: 0,season,team,numGamesPlayoffs,numBlocksPlayoffs,numGamesRegular,numBlocksRegular,blkPerGmRegular,blkPerGmPlayoff,blkDiff
0,20062007,1,11,172,82,1127,13.743902,15.636364,1.892461
1,20062007,2,5,91,82,1307,15.939024,18.2,2.260976
2,20062007,3,10,159,82,1386,16.902439,15.9,-1.002439
3,20062007,5,5,88,82,1238,15.097561,17.6,2.502439
4,20062007,7,16,290,82,1248,15.219512,18.125,2.905488


In [14]:
trace1 = go.Histogram(
    x = df.blkPerGmRegular,
    opacity = 0.66,
    name = 'Regluar Season',
    marker = dict(
        line = dict(
            color = 'grey',
            width = 1.0
        )
    ),
    autobinx=False,
    xbins=dict(
        start=8,
        end=24,
        size=2
    )
)
trace2 = go.Histogram(
    x = df.blkPerGmPlayoff,
    opacity = 0.66,
    name = 'Playoffs',
    marker = dict(
        line = dict(
            color = 'grey',
            width = 1.0
        )
    ),
    autobinx=False,
    xbins=dict(
        start=8,
        end=24,
        size=2
    )
)
data = [trace1, trace2]
layout = dict(
    title = '2006-2015 NHL Shot Blocks Per Game For Playoff Teams',
    yaxis = dict(title = '%'),
    xaxis = dict(title = 'Shot Blocks per Game (Cumulative Shot Blocks / Games Played)'),
    bargap = 0.25,
    barmode='overlay'
)
fig=dict(data=data, layout=layout)
plotly.offline.plot(fig)
# plotly.offline.iplot(fig)

'file:///Users/Rossie/Development/github/data-projects/hockey-shot-blocking/temp-plot.html'

In [15]:
# Are the two popluations normal?

resPlayoff = stats.normaltest(df.blkPerGmPlayoff)
print 'playoff: ' + str(resPlayoff)
resRegular = stats.normaltest(df.blkPerGmRegular)
print 'regluar season: ' + str(resRegular)

playoff: NormaltestResult(statistic=1.3459614507752162, pvalue=0.51018558597546892)
regluar season: NormaltestResult(statistic=0.72393887157427661, pvalue=0.6963036494895003)


In [16]:
results = stats.ttest_ind(df.blkPerGmPlayoff, df.blkPerGmRegular, equal_var = False)
print str(results)

Ttest_indResult(statistic=6.193753446868004, pvalue=2.2065936403165815e-09)


In [17]:
trace1 = go.Histogram(
    x = df.blkDiff,
    marker = dict(
        line = dict(
            color = 'grey',
            width = 1.0
        )
    )
)
data = [trace1]
layout = dict(
    title = '2006-2015 NHL Shot Blocks Per Game For Playoff Teams Difference (Playoff - Reguar Season)',
    yaxis = dict(title = '%'),
    xaxis = dict(
        title = 'Shot Blocks per Game Difference',
        zeroline = True
        
    ),
    bargap = 0.25
)
fig=dict(data=data, layout=layout)
plotly.offline.plot(fig)

'file:///Users/Rossie/Development/github/data-projects/hockey-shot-blocking/temp-plot.html'

In [65]:
# Is the difference population normal?
resDiff = stats.normaltest(df.blkDiff)
print 'diff normality test: ' + str(resDiff)

resDiffT = stats.ttest_1samp(df.blkDiff, 0.0)
print 'diff t-test: ' + str(resDiffT)

diff normality test: NormaltestResult(statistic=5.8671079764564249, pvalue=0.053207602525582813)
diff t-test: Ttest_1sampResult(statistic=9.7830178032344772, pvalue=5.7654455833651475e-18)


In [19]:
df_ordered = df.sort_values(by='blkDiff', ascending=False)
df_ordered.head()

Unnamed: 0,season,team,numGamesPlayoffs,numBlocksPlayoffs,numGamesRegular,numBlocksRegular,blkPerGmRegular,blkPerGmPlayoff,blkDiff
137,20142015,18,6,144,82,1180,14.390244,24.0,9.609756
158,20152016,30,6,137,82,1209,14.743902,22.833333,8.089431
54,20092010,9,6,129,82,1193,14.54878,21.5,6.95122
77,20102011,26,6,117,82,1032,12.585366,19.5,6.914634
157,20152016,28,10,226,82,1292,15.756098,22.6,6.843902


In [58]:
seasons = [ 20062007, 20072008, 20082009, 20092010, 20102011, \
            20112012, 20122013, 20132014, 20142015, 20152016 ]
data = []
for seasonInt in seasons:
    trace = go.Box(
        y = df.blkPerGmRegular[df.season==seasonInt],
        name = str(seasonInt)[:4] + ' - ' + str(seasonInt)[4:],
        boxpoints = 'all',
        jitter=0.3,
    )
    data.append(trace)
layout = go.Layout(
    title = "2006-2015 NHL Regular Season Shot Blocks Per Game For Playoff Teams in Each Season",
    showlegend = False,
    yaxis = dict(title = 'Shot Blocks Per Game'),
    xaxis = dict(
        title = 'Season',
        tickangle=-40
    ),
)
fig = dict(data = data, layout = layout)
plotly.offline.iplot(fig)
plotly.offline.plot(fig)