In [2]:
import numpy as np
import pandas as pd

In [3]:
g = np.array([27466.15, 24899.3, 19610.9, 19492.4, 17885.39, 17558.76, 15475.09, 12170.2])
gdp = pd.Series(g, index=['shanghai', 'beijing', 'guangzhou', 'shenzhen', 'tianjin', 'chongqing', 'suzhou', 'chengdu'])

In [4]:
gdp['suzhou']

15475.09

In [5]:
"shanghai" in gdp

True

In [6]:
"hangzhou" in gdp

False

In [7]:
gdp.keys()

Index(['shanghai', 'beijing', 'guangzhou', 'shenzhen', 'tianjin', 'chongqing',
       'suzhou', 'chengdu'],
      dtype='object')

In [8]:
list(gdp.items())

[('shanghai', 27466.150000000001),
 ('beijing', 24899.299999999999),
 ('guangzhou', 19610.900000000001),
 ('shenzhen', 19492.400000000001),
 ('tianjin', 17885.389999999999),
 ('chongqing', 17558.759999999998),
 ('suzhou', 15475.09),
 ('chengdu', 12170.200000000001)]

In [9]:
gdp["hangzhou"] = 11050.5
gdp

shanghai     27466.15
beijing      24899.30
guangzhou    19610.90
shenzhen     19492.40
tianjin      17885.39
chongqing    17558.76
suzhou       15475.09
chengdu      12170.20
hangzhou     11050.50
dtype: float64

In [10]:
gdp.suzhou

15475.09

In [12]:
s = pd.Series(np.random.randn(4), index=["tot", "pop", "sos", "mom"])
s.tot

-0.18968542452197765

In [13]:
s.pop

<bound method NDFrame.pop of tot   -0.189685
pop    0.945353
sos   -0.542849
mom    2.382006
dtype: float64>

In [14]:
gdp[['suzhou', 'shanghai', 'beijing']]

suzhou      15475.09
shanghai    27466.15
beijing     24899.30
dtype: float64

In [15]:
gdp[gdp>20000]

shanghai    27466.15
beijing     24899.30
dtype: float64

In [16]:
g = gdp['tianjin': 'suzhou']
g

tianjin      17885.39
chongqing    17558.76
suzhou       15475.09
dtype: float64

In [17]:
g['wuhan'] = 11912.6
g

tianjin      17885.39
chongqing    17558.76
suzhou       15475.09
wuhan        11912.60
dtype: float64

In [18]:
gdp

shanghai     27466.15
beijing      24899.30
guangzhou    19610.90
shenzhen     19492.40
tianjin      17885.39
chongqing    17558.76
suzhou       15475.09
chengdu      12170.20
hangzhou     11050.50
dtype: float64

In [19]:
gdp[2]

19610.900000000001

In [20]:
gdp[2: 6]

guangzhou    19610.90
shenzhen     19492.40
tianjin      17885.39
chongqing    17558.76
dtype: float64

In [21]:
s = pd.Series(np.random.randn(4), index=[1, 3, 5, 7])
s

1   -0.274993
3    1.685951
5   -0.453118
7    0.932780
dtype: float64

In [22]:
s[1]

-0.27499328409691387

In [23]:
s[1: 3]

3    1.685951
5   -0.453118
dtype: float64

In [24]:
s.iloc[1]

1.6859512575321003

In [25]:
s.iloc[1: 3]

3    1.685951
5   -0.453118
dtype: float64

In [26]:
s.loc[1:3]

1   -0.274993
3    1.685951
dtype: float64

In [27]:
gdp_index = [("shanghai", 2015), ("shanghai", 2016), ("beijing", 2015), ("beijing", 2016), ("guangzhou", 2015), ("guangzhou", 2016)]
gdp_mind = pd.MultiIndex.from_tuples(gdp_index)
gdp = pd.Series([25300, 27466, 23000, 24899, 18100, 19611], index=gdp_mind)
gdp

shanghai   2015    25300
           2016    27466
beijing    2015    23000
           2016    24899
guangzhou  2015    18100
           2016    19611
dtype: int64

In [28]:
g1 = gdp['shanghai']
type(g1)

pandas.core.series.Series

In [29]:
g1

2015    25300
2016    27466
dtype: int64

In [30]:
gdp.loc["shanghai"]

2015    25300
2016    27466
dtype: int64

In [31]:
gdp.iloc[0]

25300

In [32]:
gdp[0]

25300

In [33]:
gdp.loc['shanghai', 2015]

25300

In [34]:
gdp[2015]

IndexError: index out of bounds

In [35]:
gdp.loc[:, 2015]

shanghai     25300
beijing      23000
guangzhou    18100
dtype: int64

In [36]:
gdp.loc[gdp > 19000]

shanghai   2015    25300
           2016    27466
beijing    2015    23000
           2016    24899
guangzhou  2016    19611
dtype: int64

In [37]:
gdp.values

array([25300, 27466, 23000, 24899, 18100, 19611])

In [38]:
gdp.iloc[1: 5]

shanghai   2016    27466
beijing    2015    23000
           2016    24899
guangzhou  2015    18100
dtype: int64

In [39]:
gdp.iloc[[1, 3, 5]]

shanghai   2016    27466
beijing    2016    24899
guangzhou  2016    19611
dtype: int64

In [40]:
population = pd.Series([2415.27, 2151.6, 1270.08], index=["shanghai", "beijing", "guangzhou"])
gdp = pd.Series([27466, 24899, 19611], index=["shanghai", "beijing", "guangzhou"])
d = pd.DataFrame({'gdp':gdp, 'pop':population})
d

Unnamed: 0,gdp,pop
shanghai,27466,2415.27
beijing,24899,2151.6
guangzhou,19611,1270.08


In [41]:
d.values

array([[ 27466.  ,   2415.27],
       [ 24899.  ,   2151.6 ],
       [ 19611.  ,   1270.08]])

In [42]:
d.values[0]

array([ 27466.  ,   2415.27])

In [43]:
d.T

Unnamed: 0,shanghai,beijing,guangzhou
gdp,27466.0,24899.0,19611.0
pop,2415.27,2151.6,1270.08


In [44]:
d['gdp']

shanghai     27466
beijing      24899
guangzhou    19611
Name: gdp, dtype: int64

In [45]:
d[1]

KeyError: 1

In [46]:
d.iloc[1]

gdp    24899.0
pop     2151.6
Name: beijing, dtype: float64

In [47]:
d.iloc[1, 1]

2151.5999999999999

In [48]:
d.iloc[1:3, :2]

Unnamed: 0,gdp,pop
beijing,24899,2151.6
guangzhou,19611,1270.08


In [49]:
d.loc["beijing", "pop"]

2151.5999999999999

In [50]:
d.loc["beijing":"guangzhou", "pop"]

beijing      2151.60
guangzhou    1270.08
Name: pop, dtype: float64

In [51]:
d.loc["beijing":"guangzhou", "gdp":"pop"]

Unnamed: 0,gdp,pop
beijing,24899,2151.6
guangzhou,19611,1270.08


In [52]:
d.ix[:2, "pop"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.


shanghai    2415.27
beijing     2151.60
Name: pop, dtype: float64

In [53]:
d['pop']

shanghai     2415.27
beijing      2151.60
guangzhou    1270.08
Name: pop, dtype: float64

In [54]:
d["beijing":"guangzhou"]

Unnamed: 0,gdp,pop
beijing,24899,2151.6
guangzhou,19611,1270.08


In [55]:
d.iloc[1:]

Unnamed: 0,gdp,pop
beijing,24899,2151.6
guangzhou,19611,1270.08


In [56]:
d.loc["shanghai", "pop"]

2415.27

In [57]:
d.loc[:, 'pop']

shanghai     2415.27
beijing      2151.60
guangzhou    1270.08
Name: pop, dtype: float64

In [58]:
d.loc[['shanghai', 'guangzhou'], "pop"]

shanghai     2415.27
guangzhou    1270.08
Name: pop, dtype: float64

In [59]:
mind = pd.MultiIndex.from_product([[2016, 2017], [1, 2]], 
                                  names=["year", "test"])
columns = pd.MultiIndex.from_product([['Hertz', 'Newton', 'Sola'], ['Chinese', 'Phy']], names=['name', 'subject'])
data = np.round(np.random.randn(4, 6), 1)
data = data * 10 + 70
scores = pd.DataFrame(data, index=mind, columns=columns)
scores

Unnamed: 0_level_0,name,Hertz,Hertz,Newton,Newton,Sola,Sola
Unnamed: 0_level_1,subject,Chinese,Phy,Chinese,Phy,Chinese,Phy
year,test,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2016,1,75.0,62.0,85.0,68.0,63.0,66.0
2016,2,48.0,68.0,64.0,79.0,70.0,61.0
2017,1,75.0,84.0,54.0,69.0,52.0,88.0
2017,2,77.0,63.0,61.0,103.0,89.0,66.0


In [60]:
scores['Hertz']

Unnamed: 0_level_0,subject,Chinese,Phy
year,test,Unnamed: 2_level_1,Unnamed: 3_level_1
2016,1,75.0,62.0
2016,2,48.0,68.0
2017,1,75.0,84.0
2017,2,77.0,63.0


In [61]:
scores['Hertz', 'Phy']

year  test
2016  1       62.0
      2       68.0
2017  1       84.0
      2       63.0
Name: (Hertz, Phy), dtype: float64

In [62]:
scores.loc[:, ('Sola', 'Phy')]

year  test
2016  1       66.0
      2       61.0
2017  1       88.0
      2       66.0
Name: (Sola, Phy), dtype: float64

In [63]:
scores.iloc[1:3]

Unnamed: 0_level_0,name,Hertz,Hertz,Newton,Newton,Sola,Sola
Unnamed: 0_level_1,subject,Chinese,Phy,Chinese,Phy,Chinese,Phy
year,test,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2016,2,48.0,68.0,64.0,79.0,70.0,61.0
2017,1,75.0,84.0,54.0,69.0,52.0,88.0


In [64]:
scores.loc[2016, 2]

name    subject
Hertz   Chinese    48.0
        Phy        68.0
Newton  Chinese    64.0
        Phy        79.0
Sola    Chinese    70.0
        Phy        61.0
Name: (2016, 2), dtype: float64

In [65]:
scores.loc[(2016, 2), ('Newton')]

subject
Chinese    64.0
Phy        79.0
Name: (2016, 2), dtype: float64

In [66]:
scores.loc[(:, 1), (:, "Phy")]

SyntaxError: invalid syntax (<ipython-input-66-2cdd6f2c46b1>, line 1)

In [67]:
idx = pd.IndexSlice
scores.loc[idx[:, 1], idx[:, "Phy"]]

Unnamed: 0_level_0,name,Hertz,Newton,Sola
Unnamed: 0_level_1,subject,Phy,Phy,Phy
year,test,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2016,1,62.0,68.0,66.0
2017,1,84.0,69.0,88.0


In [68]:
columns = pd.MultiIndex.from_product([['Hertz', 'Newton', 'Sola'], ['Physics', 'Chinese']], names=['name', 'subject'])

In [69]:
scores = pd.DataFrame(data, index=mind, columns=columns)

In [70]:
scores.loc[idx[:,1], idx[:, "Phy"]]

UnsortedIndexError: 'MultiIndex Slicing requires the index to be fully lexsorted tuple len (2), lexsort depth (1)'