# Cubs lineup analysis

By [Ben Welsh](https://palewi.re/who-is-ben-welsh/)

A lookup at the lineups selected by Chicago Cubs Manager Joe Maddon during the 2018 baseball season.

In [1]:
import pandas as pd
import altair as alt

In [2]:
pd.set_option('display.max_colwidth', -1)

In [3]:
df = pd.read_csv("./input/cubs_2018_lineup.csv").set_index("number")

### How many distinct lineups have been used?

In [4]:
df['id'] = df.apply('-'.join, axis=1)

In [5]:
df.head()

Unnamed: 0_level_0,one,two,three,four,five,six,seven,eight,nine,id
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Happ,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,Baez,pitcher,Happ-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-Baez-pitcher
2,Almora,Bryant,Rizzo,Contreras,Schwarber,Russell,Zobrist,Baez,pitcher,Almora-Bryant-Rizzo-Contreras-Schwarber-Russell-Zobrist-Baez-pitcher
3,Happ,Bryant,Rizzo,Contreras,Schwarber,Russell,Zobrist,Heyward,pitcher,Happ-Bryant-Rizzo-Contreras-Schwarber-Russell-Zobrist-Heyward-pitcher
4,Almora,Bryant,Rizzo,Russell,Schwarber,Caratini,Heyward,Baez,pitcher,Almora-Bryant-Rizzo-Russell-Schwarber-Caratini-Heyward-Baez-pitcher
5,Happ,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,Baez,pitcher,Happ-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-Baez-pitcher


In [6]:
df.id.describe()

count     162                                                                 
unique    151                                                                 
top       Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher
freq      5                                                                   
Name: id, dtype: object

In [7]:
top_lineups = df.groupby("id").size().rename("games").reset_index().sort_values("games", ascending=False)

In [8]:
top_lineups[top_lineups.games > 1]

Unnamed: 0,id,games
2,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher,5
25,Almora-Heyward-Baez-Rizzo-Contreras-Schwarber-Happ-Russell-pitcher,2
72,Murphy-Baez-Rizzo-Zobrist-Heyward-Contreras-Schwarber-pitcher-Bote,2
143,Zobrist-Heyward-Bryant-Rizzo-Contreras-Schwarber-Baez-Almora-pitcher,2
10,Almora-Baez-Rizzo-Contreras-Stella-Schwarber-Russell-Heyward-Happ,2
53,Happ-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-Baez-pitcher,2
105,Rizzo-Bryant-Heyward-Baez-Contreras-Schwarber-Almora-Russell-pitcher,2
15,Almora-Bryant-Rizzo-Baez-Zobrist-Contreras-Bote-pitcher-Russell,2


### Has the lineup in Game 163 against the Brewers ever been used before?

<img src="input/game163.jpg">

In [9]:
game163 = 'Murphy-Zobrist-Baez-Rizzo-Bryant-Schwarber-Heyward-pitcher-Contreras'

In [10]:
df[df.id==game163]

Unnamed: 0_level_0,one,two,three,four,five,six,seven,eight,nine,id
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


### Has the lineup in the Wild Card Game ever been used before?

<img src="input/wild-card-lineup.jpg">

In [24]:
wildcard = "Zobrist-Bryant-Rizzo-Baez-Almora-Murphy-Contreras-Bote-pitcher"

In [25]:
df[df.id==wildcard]

Unnamed: 0_level_0,one,two,three,four,five,six,seven,eight,nine,id
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


### When was the most frequent lineup used?

In [11]:
df[df.id == 'Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher']

Unnamed: 0_level_0,one,two,three,four,five,six,seven,eight,nine,id
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
16,Almora,Baez,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,pitcher,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher
17,Almora,Baez,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,pitcher,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher
25,Almora,Baez,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,pitcher,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher
49,Almora,Baez,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,pitcher,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher
52,Almora,Baez,Bryant,Rizzo,Contreras,Schwarber,Russell,Heyward,pitcher,Almora-Baez-Bryant-Rizzo-Contreras-Schwarber-Russell-Heyward-pitcher


### How many times has the pitcher batted ninth?

In [12]:
len(df[df.nine == 'pitcher'])

99

In [13]:
len(df[df.eight == 'pitcher'])

53

In [14]:
df.nine.describe()

count     162    
unique    7      
top       pitcher
freq      99     
Name: nine, dtype: object

In [15]:
df.nine.value_counts()

pitcher     99
Happ        18
Russell     16
Bote        15
Almora      7 
Caratini    4 
Stella      3 
Name: nine, dtype: int64

### Who are the most frequent batters at the other positions?

In [16]:
df.eight.value_counts()

pitcher      53
Heyward      25
Happ         21
Russell      20
Baez         12
Gimenez      8 
Caratini     6 
Schwarber    4 
Bote         4 
Contreras    4 
Almora       3 
Stella       2 
Name: eight, dtype: int64

In [17]:
df.seven.value_counts()

Russell      42
Contreras    27
Happ         19
Caratini     15
Baez         15
Heyward      12
Schwarber    9 
Bote         8 
Almora       8 
Zobrist      5 
Gimenez      1 
Stella       1 
Name: seven, dtype: int64

In [18]:
df.six.value_counts()

Schwarber    70
Contreras    16
Russell      14
Happ         13
Caratini     11
Bote         8 
Almora       8 
Baez         7 
Zobrist      6 
Bryant       4 
Heyward      3 
Stella       2 
Name: six, dtype: int64

In [19]:
df.five.value_counts()

Contreras    43
Schwarber    27
Baez         17
Happ         16
Russell      12
Zobrist      11
Almora       8 
Stella       7 
Heyward      7 
Bote         5 
Bryant       5 
Caratini     4 
Name: five, dtype: int64

In [20]:
df.four.value_counts()

Rizzo        66
Baez         31
Contreras    29
Zobrist      17
Russell      5 
Heyward      5 
Bote         3 
Bryant       3 
Murphy       2 
Schwarber    1 
Name: four, dtype: int64

In [21]:
df.three.value_counts()

Rizzo        49
Bryant       36
Zobrist      23
Heyward      21
Baez         21
Bote         4 
Schwarber    2 
Contreras    2 
Almora       2 
Stella       1 
Murphy       1 
Name: three, dtype: int64

In [22]:
df.two.value_counts()

Bryant       43
Baez         37
Heyward      37
Zobrist      18
Almora       12
Schwarber    6 
Stella       5 
Happ         2 
Rizzo        1 
Russell      1 
Name: two, dtype: int64

In [23]:
df.one.value_counts()

Almora       46
Rizzo        31
Murphy       29
Zobrist      27
Happ         13
Bryant       7 
Baez         4 
Stella       3 
Schwarber    1 
Contreras    1 
Name: one, dtype: int64