# Two Visualizations of MLB Home Run Totals Using Bokeh
#### By: Lucas Kelly

#### Blog Post: https://lucaskellydataportfolio.weebly.com/flatiron-blog/donut-visualizations-using-bokeh

#### Data Source: http://www.seanlahman.com/baseball-archive/statistics/

#### Bokeh Dot Plot Documentation: https://bokeh.pydata.org/en/latest/docs/gallery/dot.html

#### Bokeh Pie Chart Documentation: https://bokeh.pydata.org/en/latest/docs/gallery/pie_chart.html

In [44]:
# Importing the neccessary libraries, see Bokeh documentation from link above for more. 

from math import pi

import pandas as pd

from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure, show, output_file
from bokeh.transform import cumsum
from bokeh.layouts import row
from bokeh.embed import file_html
from bokeh.resources import CDN

In [45]:
# Data set was downloaded from the Sean Lahman baseball database (link above)

teams = pd.read_csv('baseballdatabank-2019.2/core/Teams.csv')

In [46]:
# This data set shows complete statistics for MLB teams from 1871 - 2018. Since I only need the 2018 HR season
# statistics by team, I will narrow down the data to a much smaller df. 

teams.head()

Unnamed: 0,yearID,lgID,teamID,franchID,divID,Rank,G,Ghome,W,L,...,DP,FP,name,park,attendance,BPF,PPF,teamIDBR,teamIDlahman45,teamIDretro
0,1871,,BS1,BNA,,3,31,,20,10,...,24,0.834,Boston Red Stockings,South End Grounds I,,103,98,BOS,BS1,BS1
1,1871,,CH1,CNA,,2,28,,19,9,...,16,0.829,Chicago White Stockings,Union Base-Ball Grounds,,104,102,CHI,CH1,CH1
2,1871,,CL1,CFC,,8,29,,10,19,...,15,0.818,Cleveland Forest Citys,National Association Grounds,,96,100,CLE,CL1,CL1
3,1871,,FW1,KEK,,7,19,,7,12,...,8,0.803,Fort Wayne Kekiongas,Hamilton Field,,101,107,KEK,FW1,FW1
4,1871,,NY2,NNA,,5,33,,16,17,...,14,0.84,New York Mutuals,Union Grounds (Brooklyn),,90,88,NYU,NY2,NY2


In [48]:
# Selecting only the 2018 season and specific columns to look at how teams did statistically in 2018.

teams = teams[teams['yearID'] >= 2018]
teams = teams[['yearID', 'lgID', 'franchID', 'teamID', 'divID', 'Rank', 'R', 'RA', 'G', 'W', 'H', 'BB', 'HBP', 'AB', 'SF', 'HR', '2B', '3B']]

teams.head()


Unnamed: 0,yearID,lgID,franchID,teamID,divID,Rank,R,RA,G,W,H,BB,HBP,AB,SF,HR,2B,3B
2865,2018,NL,ARI,ARI,W,3,693,644,162,82,1283,560.0,52.0,5460,45.0,176,259,50
2866,2018,NL,ATL,ATL,E,1,759,657,162,90,1433,511.0,66.0,5582,43.0,175,314,29
2867,2018,AL,BAL,BAL,E,5,622,892,162,47,1317,422.0,57.0,5507,35.0,188,242,15
2868,2018,AL,BOS,BOS,E,1,876,647,162,108,1509,569.0,55.0,5623,48.0,208,355,31
2869,2018,AL,CHW,CHA,C,4,656,848,162,62,1332,425.0,66.0,5523,32.0,182,259,40


In [49]:
# Now, I'll narrow down my data even more so that I'm only looking at teams by division, league and the homeruns hit. 

df = teams[['lgID', 'franchID', 'divID', 'HR']]

In [50]:
df

Unnamed: 0,lgID,franchID,divID,HR
2865,NL,ARI,W,176
2866,NL,ATL,E,175
2867,AL,BAL,E,188
2868,AL,BOS,E,208
2869,AL,CHW,C,182
2870,NL,CHC,C,167
2871,NL,CIN,C,172
2872,AL,CLE,C,216
2873,NL,COL,W,210
2874,AL,DET,C,135


In [51]:
# Looking at the HR column, I see that when making my dot plot, I'll need to scale my x-axis to include a range
# from a min of 128 HR's hit and a max of 267 HR's hit.

df.HR.describe()

count     30.000000
mean     186.166667
std       32.261949
min      128.000000
25%      166.250000
50%      184.000000
75%      209.500000
max      267.000000
Name: HR, dtype: float64

In [52]:
# Setting up for dot plot visualization.

# Teams in the MLB are split into 6 divisions, with 5 teams in each division. My Pie Chart will break down the total 
# home runs hit by division.

# Most Bokeh visualizations take in a dictionary. Given how small this data set is, I found it easy to just total up
# the HR's by division and creating a dictionary as below.

x = {
    'AL East': (188+208+267+150+217),
    'AL West': (205+176+194+227+214),
    'AL Central': (216+166+182+135+155),
    'NL East': (170+186+175+191+128),
    'NL West': (235+162+176+210+133),
    'NL Central': (157+218+205+167+172),
}

# Next, to ensure the pie chart is a cirle, use Pandas and math libraries to create a circle from my dictionary.

data = pd.Series(x).reset_index(name='value').rename(columns={'team':'hr'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category20c[len(x)]

p = figure(plot_height=350, title="MLB 2018 Home Runs by Divison", toolbar_location=None,
           tools="hover", tooltips="@index: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle = cumsum('angle', include_zero = True), end_angle = cumsum('angle'),
        line_color = "white", color = 'color', legend = 'index', source = data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

# Here is the best part, using the output_file function, Bokeh will automatically store an .html file in your local
# library. From here, you can open that file on your local computer, open it and it's source code and add the html
# source code into your website to display your interactive chart. 

output_file("pie.html")

# Additionally, Bokeh will open a new tab so that you can view the html file in your brower. 

show(p)


In [55]:
# Here I do the same thing, this time creating a dot plot visualzation to compare each individual team's HR totals. 

factors = df['franchID']
x =  df['HR']

dot = figure(title="2018 MLB Total Home Runs by Team", toolbar_location=None,
            y_range=factors, x_range=[100,275])

dot.segment(0, factors, x, factors, line_width=2, line_color="green", )
dot.circle(x, factors, size=15, fill_color="orange", line_color="green", line_width=3, )

factors = ["foo 123", "bar:0.2", "baz-10"]
x = ["foo 123", "foo 123", "foo 123", "bar:0.2", "bar:0.2", "bar:0.2", "baz-10",  "baz-10",  "baz-10"]
y = ["foo 123", "bar:0.2", "baz-10",  "foo 123", "bar:0.2", "baz-10",  "foo 123", "bar:0.2", "baz-10"]
colors = [
    "#0B486B", "#79BD9A", "#CFF09E",
    "#79BD9A", "#0B486B", "#79BD9A",
    "#CFF09E", "#79BD9A", "#0B486B"
]

output_file("categorical.html", title="categorical.py example")

show(dot)