In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [2]:
#Now we'll learn DataFrames

#Let's get some data to play with. How about the NFL?
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [12]:
#Copy and read to get data
nfl_frame = pd.read_clipboard()

In [13]:
#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North
4,5,Chicago Bears,786,620,42,0.557,1920,1448,NFC North


In [14]:
# We can grab the oclumn names with .columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First Season',
       'Total Games', 'Conference'],
      dtype='object')

In [15]:
#Lets see some specific data columns
DataFrame(nfl_frame,columns=['Team','First Season','Total Games'])

Unnamed: 0,Team,First Season,Total Games
0,Dallas Cowboys,1960,947
1,Green Bay Packers,1921,1414
2,New England Patriots,1960,949
3,Baltimore Ravens,1996,417
4,Chicago Bears,1920,1448


In [16]:
#What happens if we ask for a column that doesn't exist?
DataFrame(nfl_frame,columns=['Team','First Season','Total Games','Stadium'])

Unnamed: 0,Team,First Season,Total Games,Stadium
0,Dallas Cowboys,1960,947,
1,Green Bay Packers,1921,1414,
2,New England Patriots,1960,949,
3,Baltimore Ravens,1996,417,
4,Chicago Bears,1920,1448,


In [17]:
# Call columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First Season',
       'Total Games', 'Conference'],
      dtype='object')

In [26]:
#We can retrieve individual columns
nfl_frame.Team


0          Dallas Cowboys
1       Green Bay Packers
2    New England Patriots
3        Baltimore Ravens
4           Chicago Bears
Name: Team, dtype: object

In [27]:
# Or try this method for multiple word columns
nfl_frame['Total Games']

0      947
1    1,414
2      949
3      417
4    1,448
Name: Total Games, dtype: object

In [40]:
#We can retrieve rows through indexing
nfl_frame.iloc[[3]]

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North


In [31]:
#We can also assign value sto entire columns
nfl_frame['Stadium']="Levi's Stadium" #Careful with the ' here

In [28]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East,Levi's Stadium
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North,Levi's Stadium
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,Levi's Stadium
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East,Levi's Stadium
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North,Levi's Stadium
5,6,San Francisco 49ers,545,444,14,0.55,1950,1003,NFC West,Levi's Stadium


In [32]:
#Putting numbers for stadiums
nfl_frame["Stadium"] = np.arange(5)

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East,0
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North,1
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East,2
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North,3
4,5,Chicago Bears,786,620,42,0.557,1920,1448,NFC North,4


In [33]:
# Call columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First Season',
       'Total Games', 'Conference', 'Stadium'],
      dtype='object')

In [34]:
#Adding a Series to a DataFrame
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])

In [35]:
#Now input into the nfl DataFrame
nfl_frame['Stadium']=stadiums

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East,AT&T Stadium
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North,
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East,
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North,
4,5,Chicago Bears,786,620,42,0.557,1920,1448,NFC North,Levi's Stadium


In [36]:
#We can also delete columns
del nfl_frame['Stadium']

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North
4,5,Chicago Bears,786,620,42,0.557,1920,1448,NFC North


In [38]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [40]:
#For full list of ways to create DataFrames from various sources go to teh documentation for pandas:
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True

In [43]:
nfl_frame.iloc[1:3, [2,3]]

Unnamed: 0,Won,Lost
1,787,589
2,529,411


In [45]:
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North
4,5,Chicago Bears,786,620,42,0.557,1920,1448,NFC North


In [46]:
nfl_frame.head(2)

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East
1,2,Green Bay Packers,787,589,38,0.57,1921,1414,NFC North


In [54]:
nfl_frame.loc[nfl_frame['First Season'] > 1930]


Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,538,403,6,0.571,1960,947,NFC East
2,3,New England Patriots,529,411,9,0.562,1960,949,AFC East
3,4,Baltimore Ravens,233,183,1,0.56,1996,417,AFC North


In [56]:
nfl_frame.loc[nfl_frame['First Season'] > 1930, ['First Season', 'Team']]

Unnamed: 0,First Season,Team
0,1960,Dallas Cowboys
2,1960,New England Patriots
3,1996,Baltimore Ravens
