# Slugging Percentages
## Author: Matthew Chin (2021)

**Data**
https://github.com/chadwickbureau/baseballdatabank

In [1]:
# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py

# Cufflinks wrapper on plotly
import cufflinks

# Data science imports
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 999

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# from __future__ import print_function, division
import matplotlib as mpl
import matplotlib.pyplot as plt
# %matplotlib inline

from plotly.offline import iplot
cufflinks.go_offline()

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')

## Slugging Percentage

A batter's slugging percentage involves not only how frequently but how far they get on base when they hit. When they go to bat and they get on base safely with a hit, it increases their chances at getting home. In that case, one's slugging percentage will go up if they hit a single, but goes even farther up if they get on safely with an extra base hit (XBH) or with a home run and touch all four bases. 

Extra base hits and home runs matter for the significance of one's slugging percentage because the power and speed of a batter are considered to know how many bases they touch per at-bat.  

Walks or base on balls (BB) are not considered because they are not hits. 

The formula for slugging is as follows 
(Reference: https://www.mlb.com/glossary/standard-stats/slugging-percentage#:~:text=Definition,hits%20are%20not%20valued%20equally.):

Let $1B$ be a single, $2B$ be a double, $3B$ be a triple, and $HR$ be a home run. 

The formula is:

$SLG = \frac{1B + (2\times2B) + (3\times3B )+ (4\times HR)}{AB}$

In [2]:
# Get slugging percentages from Batting data
slugpct = pd.read_csv("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/Batting.csv",sep=',')

In [3]:
# Get batters who are sluggers from People data
slugppl = pd.read_csv("https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master/core/People.csv",sep=',')

In [4]:
# Sample of slugpct beginning or end values
# Later cells are the most recent

# slugpct.head()
slugpct.tail()

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
108784,zimmebr02,2020,1,BAL,AL,2,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108785,zimmejo02,2020,1,DET,AL,3,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108786,zimmeky01,2020,1,KCA,AL,16,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108787,zuberty01,2020,1,KCA,AL,23,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108788,zuninmi01,2020,1,TBA,AL,28,75,8,11,4,0,4,10.0,0.0,0.0,6,37.0,0.0,3.0,0.0,0.0,0.0


In [5]:
# Print array values
slugpct

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
0,abercda01,1871,1,TRO,,1,4,0,0,0,0,0,0.0,0.0,0.0,0,0.0,,,,,0.0
1,addybo01,1871,1,RC1,,25,118,30,32,6,0,0,13.0,8.0,1.0,4,0.0,,,,,0.0
2,allisar01,1871,1,CL1,,29,137,28,40,4,5,0,19.0,3.0,1.0,2,5.0,,,,,1.0
3,allisdo01,1871,1,WS3,,27,133,28,44,10,2,2,27.0,1.0,1.0,0,2.0,,,,,0.0
4,ansonca01,1871,1,RC1,,25,120,29,39,11,3,0,16.0,6.0,2.0,2,1.0,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108784,zimmebr02,2020,1,BAL,AL,2,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108785,zimmejo02,2020,1,DET,AL,3,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108786,zimmeky01,2020,1,KCA,AL,16,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
108787,zuberty01,2020,1,KCA,AL,23,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
slugarr = slugpct.to_numpy()
slugarr


array([['abercda01', 1871, 1, ..., nan, nan, 0.0],
       ['addybo01', 1871, 1, ..., nan, nan, 0.0],
       ['allisar01', 1871, 1, ..., nan, nan, 1.0],
       ...,
       ['zimmeky01', 2020, 1, ..., 0.0, 0.0, 0.0],
       ['zuberty01', 2020, 1, ..., 0.0, 0.0, 0.0],
       ['zuninmi01', 2020, 1, ..., 0.0, 0.0, 0.0]], dtype=object)

In [8]:
# Find players whose hits are more than 0
# Since 2010
# Write to new CSV


for a in slugarr:
    for b in a:
        if a[1] == 2020 and a[8] > 0:
            print(a) 

['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43 76 15 0 19 60.0 0.0 0.0 18 59.0
 1.0 3.0 0.0 1.0 10.0]
['abreujo02' 2020 1 'CHA' 'AL' 60 240 43