In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [3]:
# python's built in method to grab a web page
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [4]:
# This method opens up a new tab at the URL specified. This page was a typical wikipedia page, we scrolled down to the win/loss
# section and highlit the section, crtl-c to clipboard and now we're using a pandas function to read the contents of the 
# clipboard into a DataFrame
nfl_frame = pd.read_clipboard()

In [5]:
# This is an interesting "down and dirty" method to prototype some data 
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1.0,Dallas Cowboys,511,367,1,0.573,1960,860,NFC East
1,2.0,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3.0,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4.0,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5.0,New England Patriots[b],476,383,9,0.554,1960,868,AFC East
5,6.0,New York Giants,684,572,33,0.543,1925,1289,NFC East
6,7.0,Denver Broncos,465,393,10,0.541,1960,868,AFC West
7,8.0,Minnesota Vikings,457,387,10,0.541,1961,854,NFC North
8,9.0,Baltimore Ravens,181,154,1,0.54,1996,336,AFC North
9,10.0,San Francisco 49ers,522,450,14,0.537,1950,986,NFC West


In [6]:
# return a list of column names
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [7]:
# we can grab a single column by specifying the name as dot notation
nfl_frame.Team

0              Dallas Cowboys
1           Green Bay Packers
2               Chicago Bears
3              Miami Dolphins
4     New England Patriots[b]
5             New York Giants
6              Denver Broncos
7           Minnesota Vikings
8            Baltimore Ravens
9         San Francisco 49ers
10      Indianapolis Colts[c]
11            Oakland Raiders
12        Pittsburgh Steelers
13         Kansas City Chiefs
14        Washington Redskins
15           Seattle Seahawks
16           Cleveland Browns
17       Los Angeles Chargers
18           Los Angeles Rams
19          Carolina Panthers
20        Philadelphia Eagles
21           Tennessee Titans
22              Buffalo Bills
23              Detroit Lions
24         Cincinnati Bengals
25              New York Jets
26         New Orleans Saints
27             Houston Texans
28       Jacksonville Jaguars
29            Atlanta Falcons
30          Arizona Cardinals
31       Tampa Bay Buccaneers
Name: Team, dtype: object

In [9]:
# Columns with more than a single word require this method
nfl_frame['First NFL Season']

0     1960
1     1921
2     1920
3     1966
4     1960
5     1925
6     1960
7     1961
8     1996
9     1950
10    1953
11    1960
12    1933
13    1960
14    1932
15    1976
16    1950
17    1960
18    1937
19    1995
20    1933
21    1960
22    1960
23    1930
24    1968
25    1960
26    1967
27    2002
28    1995
29    1966
30    1920
31    1976
Name: First NFL Season, dtype: int64

In [10]:
# Grab multiple columns by invoking the DataFrame method, specifying the DF object and a list of columns
DataFrame(nfl_frame, columns=['Team', 'Won', 'Lost'])


Unnamed: 0,Team,Won,Lost
0,Dallas Cowboys,511,367
1,Green Bay Packers,730,553
2,Chicago Bears,744,568
3,Miami Dolphins,439,341
4,New England Patriots[b],476,383
5,New York Giants,684,572
6,Denver Broncos,465,393
7,Minnesota Vikings,457,387
8,Baltimore Ravens,181,154
9,San Francisco 49ers,522,450


In [11]:
# grab first 5 rows
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1.0,Dallas Cowboys,511,367,1,0.573,1960,860,NFC East
1,2.0,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3.0,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4.0,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5.0,New England Patriots[b],476,383,9,0.554,1960,868,AFC East


In [12]:
# grab last 5 rows
nfl_frame.tail()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
27,28.0,Houston Texans,106,134,0,0.442,2002,240,AFC South
28,29.0,Jacksonville Jaguars,155,197,0,0.44,1995,352,AFC South
29,30.0,Atlanta Falcons,341,437,6,0.439,1966,784,NFC South
30,31.0,Arizona Cardinals,542,732,40,0.428,1920,1314,NFC West
31,32.0,Tampa Bay Buccaneers,250,393,1,0.389,1976,866,NFC South


In [13]:
# Grab a particular record NOTE: Deprecated, use .loc instead
nfl_frame.ix[11]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Rank                             12
Team                Oakland Raiders
Won                             456
Lost                            401
Tied                             11
Pct.                          0.532
First NFL Season               1960
Total Games                     868
Division                   AFC West
Name: 11, dtype: object

In [14]:
nfl_frame.loc[12]

Rank                                 13
Team                Pittsburgh Steelers
Won                                 601
Lost                                535
Tied                                 20
Pct.                              0.529
First NFL Season                   1933
Total Games                       1,156
Division                      AFC North
Name: 12, dtype: object

In [15]:
# add a column and assign a static value
nfl_frame['Stadium'] = 'Corporate America'

In [16]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1.0,Dallas Cowboys,511,367,1,0.573,1960,860,NFC East,Corporate America
1,2.0,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Corporate America
2,3.0,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Corporate America
3,4.0,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Corporate America
4,5.0,New England Patriots[b],476,383,9,0.554,1960,868,AFC East,Corporate America
5,6.0,New York Giants,684,572,33,0.543,1925,1289,NFC East,Corporate America
6,7.0,Denver Broncos,465,393,10,0.541,1960,868,AFC West,Corporate America
7,8.0,Minnesota Vikings,457,387,10,0.541,1961,854,NFC North,Corporate America
8,9.0,Baltimore Ravens,181,154,1,0.54,1996,336,AFC North,Corporate America
9,10.0,San Francisco 49ers,522,450,14,0.537,1950,986,NFC West,Corporate America


In [27]:
# add a Series to a DataFrame
stadiums = Series(['Michigan Stadium',
'Beaver Stadium',
'Ohio Stadium',
'Kyle Field',
'Neyland Stadium',
'Tiger Stadium',
'Bryant–Denny Stadium',
'Darrell K Royal–Texas Memorial Stadium',
'Los Angeles Memorial Coliseum',
'Sanford Stadium',
'Rose Bowl',
'Cotton Bowl Stadium',
'Ben Hill Griffin Stadium',
'Jordan–Hare Stadium',
'Memorial Stadium',
'Gaylord Family Oklahoma Memorial Stadium',
'FedExField',
'Memorial Stadium',
'Lambeau Field',
'Notre Dame Stadium',
'Camp Randall Stadium',
'Williams-Brice Stadium',
'AT&T Stadium',
'Bobby Bowden Field at Doak Campbell Stadium',
'Arrowhead Stadium',
'EverBank Field',
'Donald W. Reynolds Razorback Stadium',
'Spartan Stadium',
'Ralph Wilson Stadium',
'Bank of America Stadium',
'FirstEnergy Stadium',
'NRG Stadium',
])

In [28]:
nfl_frame['Stadium'] = stadiums

In [29]:
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1.0,Dallas Cowboys,511,367,1,0.573,1960,860,NFC East,Michigan Stadium
1,2.0,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Beaver Stadium
2,3.0,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Ohio Stadium
3,4.0,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Kyle Field
4,5.0,New England Patriots[b],476,383,9,0.554,1960,868,AFC East,Neyland Stadium


In [30]:
# delete a column
del nfl_frame['Stadium']

In [31]:
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1.0,Dallas Cowboys,511,367,1,0.573,1960,860,NFC East
1,2.0,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3.0,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4.0,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5.0,New England Patriots[b],476,383,9,0.554,1960,868,AFC East


In [32]:
# construct a DataFrame from a dictionary
data = {'City':['Phoenix', 'San Diego', 'Houston', 'Tucson'], 'Population':[1615017, 1406630, 2303482, 530706]}

In [33]:
# 
city_frame = DataFrame(data)
city_frame

Unnamed: 0,City,Population
0,Phoenix,1615017
1,San Diego,1406630
2,Houston,2303482
3,Tucson,530706
