In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## Series

In [2]:
obj = Series([3,6,9,12])
obj

0     3
1     6
2     9
3    12
dtype: int64

In [3]:
obj.shape

(4,)

In [4]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
obj.values

array([ 3,  6,  9, 12])

In [6]:
ww2_cas = Series([8700000, 4300000, 3000000, 2100000, 400000], index = ['USSR', 'Ger', 'China', 'Japan', 'USA'])

In [7]:
ww2_cas

USSR     8700000
Ger      4300000
China    3000000
Japan    2100000
USA       400000
dtype: int64

In [8]:
ww2_cas['USA']

400000

In [9]:
# causalties greater than 4 mill
ww2_cas[ww2_cas > 4000000]

USSR    8700000
Ger     4300000
dtype: int64

In [10]:
'USSR' in ww2_cas

True

In [11]:
'IND' in ww2_cas

False

In [12]:
ww2_dict = ww2_cas.to_dict()

ww2_dict

{'China': 3000000,
 'Ger': 4300000,
 'Japan': 2100000,
 'USA': 400000,
 'USSR': 8700000}

In [13]:
Series(ww2_dict)

China    3000000
Ger      4300000
Japan    2100000
USA       400000
USSR     8700000
dtype: int64

In [23]:
countries = ['China', 'india', 'USSR', 'pak', 'USA']

In [24]:
obj2 = Series(ww2_dict, dtype = 'int64', index = countries)

In [25]:
obj2

China    3000000.0
india          NaN
USSR     8700000.0
pak            NaN
USA       400000.0
dtype: float64

In [26]:
pd.isnull(obj2)

China    False
india     True
USSR     False
pak       True
USA      False
dtype: bool

In [27]:
pd.notnull(obj2)

China     True
india    False
USSR      True
pak      False
USA       True
dtype: bool

In [29]:
ww2_cas

USSR     8700000
Ger      4300000
China    3000000
Japan    2100000
USA       400000
dtype: int64

In [30]:
obj2

China    3000000.0
india          NaN
USSR     8700000.0
pak            NaN
USA       400000.0
dtype: float64

In [31]:
ww2_cas + obj2

China     6000000.0
Ger             NaN
Japan           NaN
USA        800000.0
USSR     17400000.0
india           NaN
pak             NaN
dtype: float64

In [32]:
obj2.name = 'World War 2 Causalties' 

In [34]:
obj2

China    3000000.0
india          NaN
USSR     8700000.0
pak            NaN
USA       400000.0
Name: World War 2 Causalties, dtype: float64

In [35]:
obj2.index.name = 'Countries'

In [36]:
obj2

Countries
China    3000000.0
india          NaN
USSR     8700000.0
pak            NaN
USA       400000.0
Name: World War 2 Causalties, dtype: float64

### DataFrames

In [59]:
nfl_df = pd.read_csv('/resources/data/nfl.csv',sep='\t', header=0)

In [60]:
nfl_df

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [61]:
nfl_df.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [63]:
nfl_df.Team

0          Dallas Cowboys
1       Green Bay Packers
2           Chicago Bears
3          Miami Dolphins
4    New England Patriots
Name: Team, dtype: object

In [65]:
nfl_df['First NFL Season']

0    1960
1    1921
2    1920
3    1966
4    1960
Name: First NFL Season, dtype: int64

In [66]:
DataFrame(nfl_df,columns = ['Team', 'First NFL Season', 'Total Games'])

Unnamed: 0,Team,First NFL Season,Total Games
0,Dallas Cowboys,1960,866
1,Green Bay Packers,1921,1320
2,Chicago Bears,1920,1354
3,Miami Dolphins,1966,784
4,New England Patriots,1960,868


In [67]:
DataFrame(nfl_df,columns = ['Team', 'First NFL Season', 'Total Games', 'ColumnNotInDF'])

Unnamed: 0,Team,First NFL Season,Total Games,ColumnNotInDF
0,Dallas Cowboys,1960,866,
1,Green Bay Packers,1921,1320,
2,Chicago Bears,1920,1354,
3,Miami Dolphins,1966,784,
4,New England Patriots,1960,868,


In [70]:
nfl_df.head(2)

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North


In [72]:
nfl_df.tail(2)
# default is 5

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [74]:
# Grab 1st row
nfl_df.ix[0]

Rank                             1
Team                Dallas Cowboys
Won                            493
Lost                           367
Tied                             6
Pct.                         0.573
First NFL Season              1960
Total Games                    866
Division                  NFC East
Name: 0, dtype: object

In [77]:
nfl_df.ix[0:2]

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North


In [81]:
nfl_df['Stadium'] = 'Gachibowli'

In [82]:
nfl_df

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Gachibowli
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Gachibowli
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Gachibowli
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Gachibowli
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Gachibowli


In [83]:
nfl_df['Stadium'] = np.arange(1,6)

In [84]:
nfl_df

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,1
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,2
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,3
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,4
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,5


In [92]:
stadiums = Series(["Sta1", "Sta2", "Sta3", "Sta4", "Sta5"],index=[0,1,2,3,4])

In [93]:
stadiums

0    Sta1
1    Sta2
2    Sta3
3    Sta4
4    Sta5
dtype: object

In [94]:
nfl_df['Stadium'] = stadiums

In [95]:
nfl_df

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Sta1
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Sta2
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Sta3
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Sta4
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Sta5


In [96]:
del nfl_df['Stadium']

In [97]:
nfl_df

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [98]:

data = {'City':['SF', 'LA', 'NYC'], 'Pop':[837000, 3880000, 8400000]}

In [99]:
city_df = DataFrame(data)

In [100]:
city_df

Unnamed: 0,City,Pop
0,SF,837000
1,LA,3880000
2,NYC,8400000


# Index Objects