In [1]:
# preprocessing for 2015-16 NBA season D3.js visualization
import pandas as pd
import time
%load_ext watermark
%watermark -a 'Kevin Nguyen' -v -p pandas -d -t -z

Kevin Nguyen 2016-04-14 13:22:37 EDT

CPython 3.5.0
IPython 4.1.2

pandas 0.18.0


In [2]:
# http://www.basketball-reference.com/leagues/NBA_2016_games.html
data = pd.read_csv("../data/data.csv", skiprows=1, header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Tue Oct 27 2015,8:00 pm,Box Score,Detroit Pistons,106,Atlanta Hawks,94,,
1,Tue Oct 27 2015,8:00 pm,Box Score,Cleveland Cavaliers,95,Chicago Bulls,97,,
2,Tue Oct 27 2015,10:30 pm,Box Score,New Orleans Pelicans,95,Golden State Warriors,111,,
3,Wed Oct 28 2015,7:30 pm,Box Score,Philadelphia 76ers,95,Boston Celtics,112,,
4,Wed Oct 28 2015,7:30 pm,Box Score,Chicago Bulls,115,Brooklyn Nets,100,,


In [3]:
# get home and visiting team wins
home_team = data[4].values
away_team = data[6].values

In [4]:
# compare which team won
# return win or loss for each row
data['home_team_won'] = [home > away for home, away in zip(home_team, away_team)]
data['away_team_won'] = [home < away for home, away in zip(home_team, away_team)]

In [5]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,home_team_won,away_team_won
0,Tue Oct 27 2015,8:00 pm,Box Score,Detroit Pistons,106,Atlanta Hawks,94,,,True,False
1,Tue Oct 27 2015,8:00 pm,Box Score,Cleveland Cavaliers,95,Chicago Bulls,97,,,False,True
2,Tue Oct 27 2015,10:30 pm,Box Score,New Orleans Pelicans,95,Golden State Warriors,111,,,False,True
3,Wed Oct 28 2015,7:30 pm,Box Score,Philadelphia 76ers,95,Boston Celtics,112,,,False,True
4,Wed Oct 28 2015,7:30 pm,Box Score,Chicago Bulls,115,Brooklyn Nets,100,,,True,False


In [6]:
# rename cols to make it eaiser to concat dataframes
home_team = data.set_index(data[3])[['home_team_won', 0]]
home_team.columns = ['RESULT', 'DATE']
away_team = data.set_index(data[5])[['away_team_won', 0]]
away_team.columns = ['RESULT', 'DATE']

# concat frames
df = pd.concat([away_team, home_team])

# delete index name
del df.index.name
df = df.reset_index()

# rename cols in new dataset
df.columns = ['TEAM', 'RESULT', 'DATE']

# convert date into pandas date time format
df['DATE'] = pd.to_datetime(df.DATE)

In [7]:
# set up weekly sequences with python's datetime module
import datetime

start = pd.datetime(2015, 10, 27)
end = pd.datetime(2016, 4, 14)
rng = pd.date_range(start, end, freq='W')
df['week'] = df['DATE'].dt.weekofyear

In [8]:
# convert results into intergers
df['RESULT'] = df['RESULT'].astype(int)

# group dataframe by team and week
grouped_df = df.groupby(['TEAM', 'week'])['RESULT'].sum().unstack().cumsum(axis=1)

# delete col and index names from indexing
del grouped_df.index.name
del grouped_df.columns.name

# save team names to rename columns later
team_names = grouped_df.index

# reset index
grouped_df.reset_index(inplace=True, drop=True)

# reset index
grouped_df.T.reset_index(inplace=True, drop=True)

In [9]:
# transpose dataframe
frame = grouped_df.T

# rename columns
frame.columns = team_names

# rename index
frame.index = [i + 1 for i in range(frame.shape[0])]

# name index for visualization
frame.index.name = 'week'

# save dataset
frame.to_csv('../test-visualization/rankdata.csv')

# checkout new dataset
frame

Unnamed: 0_level_0,Atlanta Hawks,Boston Celtics,Brooklyn Nets,Charlotte Hornets,Chicago Bulls,Cleveland Cavaliers,Dallas Mavericks,Denver Nuggets,Detroit Pistons,Golden State Warriors,...,Oklahoma City Thunder,Orlando Magic,Philadelphia 76ers,Phoenix Suns,Portland Trail Blazers,Sacramento Kings,San Antonio Spurs,Toronto Raptors,Utah Jazz,Washington Wizards
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2,1,0,0,2,4,3,2,3,4,...,2,1,1,1,1,2,3,3,2,1
2,3,4,1,1,3,6,4,4,4,6,...,6,1,2,1,3,4,7,4,3,4
3,5,6,2,4,5,8,6,5,5,9,...,9,1,3,2,5,7,9,8,4,5
4,6,9,2,6,6,12,9,6,7,13,...,12,2,4,2,8,7,10,11,6,6
5,9,13,3,8,7,14,10,9,9,15,...,14,3,5,2,10,8,14,13,10,7
6,10,14,4,10,7,16,10,10,9,17,...,16,4,5,2,12,9,16,14,11,8
7,10,15,5,12,9,18,11,10,9,18,...,16,5,5,2,14,10,18,15,12,10
8,12,17,7,13,10,19,13,11,13,22,...,17,7,5,3,17,11,21,18,13,13
9,14,20,8,16,11,22,14,13,14,24,...,19,8,5,5,18,12,24,20,14,15
10,17,21,8,20,12,25,14,16,16,28,...,20,9,6,5,20,12,27,23,16,15


In [10]:
# list_of_weeks = ["Dec 28, 2014", "Jan 4, 2015", "Jan 11, 2015", "Jan 18, 2015", "Jan 25, 2015", "Feb 1,2015",
# "Feb 8, 2015", "Feb 15, 2015", "Feb 22, 2015", "Mar 1, 2015", "Mar 8, 2015", "Mar 15, 2015", "Mar 22, 2015",
# "Mar 29, 2015", "Apr 5, 2015", "Apr 12, 2015", "Apr 19, 2015", "Apr 26, 2015", "May 3, 2015", "May 10, 2015",
# "May 17, 2015", "May 24, 2015", "May 31, 2015", "Jun 7, 2015", "Jun 14, 2015"]

# ["Week {}".format(i + 1) for i in range(len(frame))]

In [1]:
west = ["Golden State Warriors", "San Antonio Spurs", "Oklahoma City Thunder", "Los Angeles Clippers",
        "Portland Trail Blazers", "Dallas Mavericks", "Memphis Grizzlies", "Houston Rockets", "Utah Jazz", 
        "Sacramento Kings", "Denver Nuggets", "New Orleans Pelicans", "Minnesota Timberwolves",
        "Phoenix Suns", "Los Angeles Lakers"]
east = ["Cleveland Cavaliers", "Toronto Raptors", "Miami Heat", "Atlanta Hawks", "Boston Celtics",
        "Charlotte Hornets", "Indiana Pacers", "Detroit Pistons", "Chicago Bulls", "Washington Wizards", 
        "Orlando Magic", "Milwaukee Bucks", "New York Knicks", "Brooklyn Nets", "Philadelphia 76ers"]

In [2]:
for team in west:
    print("<li class='candidatename'><a data-target='#'>{}</a></li>".format(team))

<li class='candidatename'><a data-target='#'>Golden State Warriors</a></li>
<li class='candidatename'><a data-target='#'>San Antonio Spurs</a></li>
<li class='candidatename'><a data-target='#'>Oklahoma City Thunder</a></li>
<li class='candidatename'><a data-target='#'>Los Angeles Clippers</a></li>
<li class='candidatename'><a data-target='#'>Portland Trail Blazers</a></li>
<li class='candidatename'><a data-target='#'>Dallas Mavericks</a></li>
<li class='candidatename'><a data-target='#'>Memphis Grizzlies</a></li>
<li class='candidatename'><a data-target='#'>Houston Rockets</a></li>
<li class='candidatename'><a data-target='#'>Utah Jazz</a></li>
<li class='candidatename'><a data-target='#'>Sacramento Kings</a></li>
<li class='candidatename'><a data-target='#'>Denver Nuggets</a></li>
<li class='candidatename'><a data-target='#'>New Orleans Pelicans</a></li>
<li class='candidatename'><a data-target='#'>Minnesota Timberwolves</a></li>
<li class='candidatename'><a data-target='#'>Phoenix Su

In [3]:
for team in east:
    print("<li class='candidatename'><a data-target='#'>{}</a></li>".format(team))

<li class='candidatename'><a data-target='#'>Cleveland Cavaliers</a></li>
<li class='candidatename'><a data-target='#'>Toronto Raptors</a></li>
<li class='candidatename'><a data-target='#'>Miami Heat</a></li>
<li class='candidatename'><a data-target='#'>Atlanta Hawks</a></li>
<li class='candidatename'><a data-target='#'>Boston Celtics</a></li>
<li class='candidatename'><a data-target='#'>Charlotte Hornets</a></li>
<li class='candidatename'><a data-target='#'>Indiana Pacers</a></li>
<li class='candidatename'><a data-target='#'>Detroit Pistons</a></li>
<li class='candidatename'><a data-target='#'>Chicago Bulls</a></li>
<li class='candidatename'><a data-target='#'>Washington Wizards</a></li>
<li class='candidatename'><a data-target='#'>Orlando Magic</a></li>
<li class='candidatename'><a data-target='#'>Milwaukee Bucks</a></li>
<li class='candidatename'><a data-target='#'>New York Knicks</a></li>
<li class='candidatename'><a data-target='#'>Brooklyn Nets</a></li>
<li class='candidatename'>

In [17]:
for team in east:
    print("(d.name === '{}')".format(team))

(d.name === 'Cleveland Cavaliers')
(d.name === 'Toronto Raptors')
(d.name === 'Miami Heat')
(d.name === 'Atlanta Hawks')
(d.name === 'Boston Celtics')
(d.name === 'Charlotte Hornets')
(d.name === 'Indiana Pacers')
(d.name === 'Detroit Pistons')
(d.name === 'Chicago Bulls')
(d.name === 'Washington Wizards')
(d.name === 'Orlando Magic')
(d.name === 'Milwaukee Bucks')
(d.name === 'New York Knicks')
(d.name === 'Brooklyn Nets')
(d.name === 'Philadelphia 76ers')


In [11]:
%reload_ext watermark
%watermark -a 'Kevin Nguyen' -v -d -t -z

Kevin Nguyen 2016-04-14 13:22:37 EDT

CPython 3.5.0
IPython 4.1.2
