In [1]:
import os
import pandas as pd
import src.nasty_score as nasty
import plotly.graph_objects as go

### Read data and load models

In [2]:
data = pd.read_feather('data/nasty_data_2021.fea').fillna(-9999)
pitchers = pd.read_feather('data/pitcher_ids.fea')

In [3]:
sz_model = nasty.import_model('models/sz_model.pkl')
swing_model = nasty.import_model('models/swing_model.pkl')
swingmiss_model = nasty.import_model('models/swingmiss_model.pkl')

### Assign nasty scores

In [4]:
data = nasty.add_nasty_score(data, sz_model, swing_model, swingmiss_model)
data = data.merge(pitchers, on='pitcher', how='left')

In [5]:
sorted_data = data.sort_values(by='nasty_score', ascending=False)\
    .reset_index(drop=True)

sorted_data['pre_sticky_flag'] = (
    sorted_data['game_date'] < '2021-06-21').astype(int)

In [6]:
for n_pitches in [50, 100, 150, 500, 1000, 2500, 5000, 10000, 25000, 50000, 100000, 250000, 500000]:
    percent_sticky = (sorted_data.loc[0: n_pitches, 'pre_sticky_flag'].sum() /
                      n_pitches)

    print(f'Of the top {n_pitches} pitches, {percent_sticky * 100:.2f}%'
          ' were pre-sticky-stuff ban')


Of the top 50 pitches, 62.00% were pre-sticky-stuff ban
Of the top 100 pitches, 60.00% were pre-sticky-stuff ban
Of the top 150 pitches, 52.00% were pre-sticky-stuff ban
Of the top 500 pitches, 47.80% were pre-sticky-stuff ban
Of the top 1000 pitches, 48.20% were pre-sticky-stuff ban
Of the top 2500 pitches, 48.20% were pre-sticky-stuff ban
Of the top 5000 pitches, 47.56% were pre-sticky-stuff ban
Of the top 10000 pitches, 47.05% were pre-sticky-stuff ban
Of the top 25000 pitches, 45.08% were pre-sticky-stuff ban
Of the top 50000 pitches, 44.65% were pre-sticky-stuff ban
Of the top 100000 pitches, 44.38% were pre-sticky-stuff ban
Of the top 250000 pitches, 44.13% were pre-sticky-stuff ban
Of the top 500000 pitches, 43.39% were pre-sticky-stuff ban


In [7]:
cole = data.loc[data.player_name == 'Cole, Gerrit'].groupby(['game_date'])\
    .agg({'nasty_score': 'mean'}).reset_index()

degrom = data.loc[data.player_name == 'deGrom, Jacob'].groupby(['game_date'])\
    .agg({'nasty_score': 'mean'}).reset_index()

gausman = data.loc[data.player_name == 'Gausman, Kevin']\
    .groupby(['game_date']).agg({'nasty_score': 'mean'}).reset_index()

In [8]:
plot_data = data.groupby(['game_date']).agg({'nasty_score': 'mean'})\
    .reset_index()

plot_data['nasty_score_smooth'] = plot_data['nasty_score'].rolling(
    window=10).mean()

fig = go.Figure()

fig.add_trace(go.Scatter(x=plot_data.game_date,
                         y=plot_data.nasty_score_smooth,
                         marker=dict(color='black')))

sticky_max = plot_data.loc[
    (plot_data['game_date'] <= '2021-06-21') &
    (plot_data['game_date'] >= '2021-06-02'), 'nasty_score_smooth'].max()

playoff_max = plot_data.loc[
    (plot_data['game_date'] <= '2021-11-02') &
    (plot_data['game_date'] >= '2021-10-05'), 'nasty_score_smooth'].max()

fig.add_shape(
    type="rect",
    x0="2021-06-01",
    y0=18.5,
    x1="2021-06-21",
    y1=sticky_max,
    fillcolor="Red",
    line_width=0,
    layer="below",
    opacity=0.35
)

fig.add_shape(
    type="rect",
    x0='2021-10-05',
    y0=18.5,
    x1='2021-11-02',
    y1=playoff_max,
    fillcolor="Gold",
    line_width=0,
    layer="below",
    opacity=0.5
)

fig.add_annotation(
    x='2021-06-11', y=20.5,
    text='Sticky-stuff ban',
    showarrow=False)

fig.add_annotation(
    x='2021-10-19', y=20.5,
    text='Playoffs',
    showarrow=False)

fig.update_layout(
    title='Nasty score rolling average over time',
    title_font_size=20.1,
    plot_bgcolor='rgba(0,0,0,0)',
    height=600
)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False, title='Nasty Score', title_font_size=16)

if not os.path.exists('images'):
    os.mkdir('images')
    
fig.write_image('images/nasty_plot.jpeg', width=1500, height=600)

fig.show()
