## Basic pandas optimizations


In [1]:
import pandas as pd

baseball_df = pd.read_csv('../data/baseball_stats.csv')
baseball_df.head(3)

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403


### Adding win percentage to DataFrame

In [3]:
import numpy as np

def calc_win_perc(wins, games_played):
    win_perc = wins / games_played
    return np.round(win_perc, 2)

win_perc_list = []
for i in range(len(baseball_df)):
    row = baseball_df.iloc[i]
    wins = row['W']
    games_played = row['G']
    win_perc = calc_win_perc(wins, games_played)
    win_perc_list.append(win_perc)
baseball_df['WP'] = win_perc_list
baseball_df.head(3)

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,WP
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,0.5
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,0.58
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,0.57


### Iterating with `.iterrows()`
- Pandas comes with efficient methods to iterate over dataframes
- Similar to `.iloc`, but `.iterrows()` returns a DataFrame row as a tuple of (index, pandas series) pairs.
- This means each object returned from `.iterrows()` contains the index of each row as the first element and the data in each row as a pandas series as the second element.
- Notice that we still create 