In [1]:
import pybaseball as pbb
import pandas as pd
import numpy as np
import re
import string

meters_per_mile = 1609.34
hours_per_second = 1/360
baseball_kg = 0.145

def homer_bulb(mlbam_key):

    batter_sc = pbb.statcast_batter(start_dt="2008-01-01", end_dt="2018-07-05", player_id=mlbam_key)
    hr_hit_mps = batter_sc["launch_speed"].loc[batter_sc["events"]=="home_run"]*meters_per_mile*hours_per_second
    hr_pitch_mps = batter_sc["release_speed"].loc[batter_sc["events"]=="home_run"]*meters_per_mile*hours_per_second
    
    # calc kinetic energy added
    KEA = 0.5*baseball_kg*np.power(hr_hit_mps-hr_pitch_mps, 2)  # joules
    total_KEA = KEA.sum()
    total_Wh = total_KEA/(3.6E3)  # watt hours
    seconds_of_40W_bulb_power = total_Wh*60/40
    
    return seconds_of_40W_bulb_power

In [2]:
hitting_totals = pbb.batting_stats(2008, 2018, league="all", qual=100, ind=0)

In [3]:
HR_leaders = hitting_totals[["Name", "HR"]].sort_values("HR", ascending=False)
# HR_leaders["Name"]

names = []
for row in HR_leaders[:25].iterrows():  # top 25 (to make sure all "bulb" leaders get included)
    names.append(row[1].Name.split(" "))

id_dict = {}
for name in names:
    lookup = pbb.playerid_lookup(name[1], name[0])
    if len(lookup["key_mlbam"]) == 1:
        id_dict[name[0]+" "+name[1]] = lookup["key_mlbam"].max()
    elif len(lookup["key_mlbam"]) >= 2:
        print("more than one player named "+str(name[0])+" "+str(name[1]))
        print("*getting info for all*")
        for i in range(len(lookup["key_mlbam"])):
            print(i)
            id_dict[name[0]+" "+name[1]+str(i)] = lookup["key_mlbam"][i]
    else:
        print("ERROR: something isn't right: "+str(len(lookup["key_mlbam"])))

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
more than one player named Nelson Cruz
*getting info for all*
0
1
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
more than one player named Miguel Cabrera
*getting info for all*
0
1
Gathering player lookup table. This may take a moment.
more than one player named Jose Bautista
*getting info for all*
0
1
2
3
4
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
more than one player named Ryan Braun
*getting info for all*
0
1
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
more than one player named Chris Davis
*getting info for all*
0
1
2
3
4
5
6
7
8
9
10
11
12
Gathering player lookup table. This may take a moment.
Gathering player lookup table.

In [4]:
bulb_dict = {}
for key, val in id_dict.items():
    bulb_dict[key] = homer_bulb(val)

Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data


  This is separate from the ipykernel package so we can avoid doing imports until


Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering 

In [5]:
bulb_df = pd.DataFrame.from_dict(bulb_dict, orient="index")
bulb_df.columns = ["40W Bulb Seconds"]
bulb_df.sort_values("40W Bulb Seconds", ascending=False, inplace=True)

bulb_df = bulb_df[:10]  # take the top 10
# DataFrame with homer totals
HR_df = pd.DataFrame(data=HR_leaders["HR"].values, index=HR_leaders["Name"], columns=["HR"])
HR_df = HR_df.T

# clean out the same-named players that don't qualify, and add HR column
new_bulb_dict = {}
for row in bulb_df.iterrows():
    if re.search("[0-9]", row[0]):
        new_bulb_dict[row[0].rstrip(string.digits)] = [
            round(row[1]["40W Bulb Seconds"], 2), int(HR_df[row[0].rstrip(string.digits)])
        ]
    else:
        new_bulb_dict[row[0]] = [row[1]["40W Bulb Seconds"], int(HR_df[row[0]])]
        
new_bulb_df = pd.DataFrame.from_dict(new_bulb_dict, orient="index")
new_bulb_df.columns = ["40W Bulb Seconds", "post-Statcast Homeruns"]
new_bulb_df.sort_values("40W Bulb Seconds", ascending=False, inplace=True)


HR_df["Mike Trout"]

HR    240.0
Name: Mike Trout, dtype: float64

In [6]:
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import Set3
from bokeh.models import Range1d, LinearAxis
from bokeh.io import export_png

source = ColumnDataSource(data=pd.DataFrame(new_bulb_df["40W Bulb Seconds"]))
source_HR = ColumnDataSource(data=pd.DataFrame(new_bulb_df["post-Statcast Homeruns"]))

p = figure(
    x_range=source.data["index"], y_range=(
        0,source.data["40W Bulb Seconds"].max()+5
    ),
    title="Powering lightbulbs with dingers (calculated by hit velocity added)", width=1000, height=500
)

p.vbar(
    x="index", top="40W Bulb Seconds", source=source, width=0.8, line_color='white',
    fill_color=factor_cmap("index", palette=Set3[10], factors=source.data["index"]), fill_alpha=1, legend="Bulb Seconds"
)

p.extra_y_ranges = {
    "HR": Range1d(
        start=source_HR.data["post-Statcast Homeruns"].min()-10, end=source_HR.data["post-Statcast Homeruns"].max()+10
    )
}

p.yaxis.axis_label = "Seconds of power for a 40W bulb"
p.add_layout(LinearAxis(y_range_name="HR", axis_label="Total HRs in Statcast Era"), "right")

p.circle(
    x="index", y="post-Statcast Homeruns", source=source_HR, y_range_name="HR",
    fill_color=factor_cmap("index", palette=Set3[10], factors=source.data["index"]), fill_alpha=1,
    line_color="grey", line_width=4, size=20, legend="HRs"
)

p.xgrid.grid_line_color = None
p.ygrid.minor_grid_line_color = 'grey'
p.ygrid.minor_grid_line_alpha = 0.1

p.yaxis.axis_label_text_font_size = "16pt"
p.yaxis.major_label_text_font_size = "12pt"
p.yaxis.axis_label_text_font_style = "normal"
p.yaxis.axis_label_standoff = 20
p.legend.glyph_width = 40
p.legend.glyph_height = 40
p.legend.background_fill_color = "white"
p.legend.border_line_color = "black"
p.legend.border_line_width = 2
p.title.text_font_size = "12pt"


output_notebook()
show(p)

In [7]:
export_png(p, "dingers.png")

RuntimeError: To use bokeh.io image export functions you need selenium ("conda install -c bokeh selenium" or "pip install selenium")