In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [2]:
#Now we'll learn DataFrames

#Let's get some data to play with. How about the NFL?
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [5]:
#Read data from clipboard
nfl_frame = pd.read_clipboard()

In [6]:
#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East


In [7]:
# We can grab the column names with .columns
nfl_frame.columns

Index([u'Rank', u'Team', u'Won', u'Lost', u'Tied', u'Pct.',
       u'First NFL Season', u'Total Games', u'Division'],
      dtype='object')

In [11]:
#Lets see some specific data columns
DataFrame(nfl_frame,columns=['Team','First Season','Total Games'])

Unnamed: 0,Team,First Season,Total Games
0,Chicago Bears,,1338
1,Dallas Cowboys,,850
2,Green Bay Packers,,1304
3,Miami Dolphins,,768
4,New England Patriots,,852


In [12]:
#What happens if we ask for a column that doesn't exist?
DataFrame(nfl_frame,columns=['Team','First Season','Total Games','Stadium'])

Unnamed: 0,Team,First Season,Total Games,Stadium
0,Chicago Bears,,1338,
1,Dallas Cowboys,,850,
2,Green Bay Packers,,1304,
3,Miami Dolphins,,768,
4,New England Patriots,,852,


In [32]:
# Call columns
nfl_frame.reset_index()

KeyError: (u'Rank', u'Team', u'Won', u'Lost', u'Tied', u'Pct.', u'First NFL Season', u'Total Games', u'Division')

In [18]:
#We can retrieve individual columns
nfl_frame.Team

0       Dallas Cowboys
1        Chicago Bears
2    Green Bay Packers
3       Miami Dolphins
4     Baltimore Ravens
Name: Team, dtype: object

In [19]:
# Or try this method for multiple word columns
nfl_frame['Total Games']

0    1,338
1      850
2    1,304
3      768
4      852
Name: Total Games, dtype: object

In [16]:
#We can retrieve rows through indexing
nfl_frame.ix[3]

Rank                             4
Team                Miami Dolphins
Won                            429
Lost                           335
Tied                             4
Pct.                         0.561
First NFL Season              1966
Total Games                    768
Division                  AFC East
Name: 3, dtype: object

In [20]:
#We can also assign same value to entire column
nfl_frame['Stadium']="Levi's Stadium" #Careful with the ' here

In [21]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North,Levi's Stadium
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East,Levi's Stadium
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North,Levi's Stadium
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East,Levi's Stadium
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East,Levi's Stadium


In [25]:
#Putting numbers for stadiums  - array on right side has to be of same height as Stadium column's # of rows.
nfl_frame["Stadium"] = np.arange(5)

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North,0
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East,1
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North,2
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East,3
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East,4


In [26]:
# Call columns
nfl_frame.columns

Index([u'Rank', u'Team', u'Won', u'Lost', u'Tied', u'Pct.',
       u'First NFL Season', u'Total Games', u'Division', u'Stadium'],
      dtype='object')

In [27]:
#Adding a Series to a DataFrame
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])
stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [29]:
#Now input into the nfl DataFrame
nfl_frame['Stadium']=stadiums
#So, if series being added has index values in it, then, respective values are user for particular index rows.

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North,AT&T Stadium
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East,
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North,
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East,
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East,Levi's Stadium


In [16]:
#We can also delete columns
del nfl_frame['Stadium']

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North


In [30]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [31]:
#For full list of ways to create DataFrames from various sources go to the documentation for pandas:
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True