# Moneyball
## Module Import and CSV Reading

In [3]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [5]:
titles = ['playerID', 'yearID', 'stint', 'teamID', 'lgID', 'R', 'H', 'RBI', 'BB', 'HBP', 'SF', 'AB']
batting = pd.read_csv('Data/Batting.csv', usecols=titles)

### Removing NaNs from Important Fields

In [6]:
batting['SF'] = batting['SF'].fillna(0)
batting['HBP'] = batting['HBP'].fillna(0)

In [7]:
master_titles=['playerID', 'nameFirst', 'nameLast', 'nameGiven', 'weight', 'height', 'bats', 'throws', 'finalGame']
master = pd.read_csv('Data/Master.csv', usecols=master_titles)

In [8]:
salary_titles=['yearID', 'playerID', 'salary']
salary = pd.read_csv('Data/Salaries.csv', usecols=salary_titles)

In [9]:
appearances = pd.read_csv('Data/Appearances.csv')

### Merging DataFrames

In [10]:
batting_master = batting.merge(master, on=['playerID'])

In [11]:
batting_master_salary = batting_master.merge(salary, on=['playerID', 'yearID'])

In [12]:
final = batting_master_salary.merge(appearances, on=['playerID', 'yearID'])

### Finding 2015-season Players

In [13]:
playerlist2015 = final[final['yearID'] == 2015]

In [16]:
players = playerlist2015

### Finding On-Base-Percentage and Creating New Series

In [17]:
numerator = players['H'] + players['BB'] + players['HBP']
denominator = players['AB'] + players['BB'] + players['HBP'] + players['SF']
OBP = numerator/denominator
players['OBP'] = OBP

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


### Finding OBP/Salary and Creating New Series

In [18]:
obp_per_salary = players['OBP']/players['salary']
players['OBP Per Salary'] = obp_per_salary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


### Finding the Best Player by Position

Note: Pitcher was slightly complicated as many had an OBP of 1.0 (outliers, as far as we are concerned).  My solution was to filter by games played at the position and "at bats".  

In [89]:
def get_best_player(df):
    p_list = ['G_p', 'G_c', 'G_1b', 'G_2b', 'G_3b', 'G_ss', 'G_lf', 'G_cf', 'G_rf']
    best = []
    for item in p_list:
        if item == 'G_p':
            best.append(df[(df['G_p'] > 20) & (df['AB'] > 20)].sort(['OBP Per Salary'], ascending=False)[:1])
        else:
            best.append(df[df[item] > 50].sort(['OBP Per Salary'], ascending=False)[:1])
    return best

In [94]:
best = get_best_player(players)



In [95]:
best_players = pd.concat(best)

In [96]:
p_list = ['Pitcher', 'Catcher', 'First Base', 'Second Base', 'Third Base', 'Shortstop', 'Left Field', 'Center Field', 'Right Field']
best_players["Position"] = Series(p_list, best_players.index)

## Moneyball Team 2015

In [97]:
best_players[['nameFirst', 'nameLast', 'Position', 'AB', 'RBI', 'salary', 'OBP', 'OBP Per Salary']]

Unnamed: 0,nameFirst,nameLast,Position,AB,RBI,salary,OBP,OBP Per Salary
32169,Michael,Wacha,Pitcher,52.0,4.0,520000,0.214286,4.120879e-07
32260,Roberto,Perez,Catcher,184.0,21.0,508600,0.348416,6.850497e-07
32212,C. J.,Cron,First Base,378.0,51.0,512500,0.300248,5.8585e-07
32257,Joe,Panik,Second Base,382.0,37.0,522500,0.377622,7.227223e-07
32217,Matt,Duffy,Third Base,573.0,77.0,509000,0.334426,6.57026e-07
31977,Xander,Bogaerts,Shortstop,613.0,81.0,543000,0.354839,6.534783e-07
31394,Brandon,Guyer,Left Field,332.0,28.0,515800,0.358639,6.953058e-07
31884,A. J.,Pollock,Center Field,609.0,76.0,519500,0.367013,7.064743e-07
32283,George,Springer,Right Field,388.0,41.0,512900,0.367483,7.164814e-07
