In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# Series
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj['b']  # 1.0
obj[1]    # 1.0
obj[2:100]
# c    2.0
# d    3.0
obj[['b', 'a', 'd']]
# b    1.0
# a    0.0
# d    3.0
obj[[1, 3]]
# b    1.0
# d    3.0
obj[obj < 2]
# a    0.0
# b    1.0
obj['b':'c']  # python的切片是左闭右开的，即[ )；而Series的切片是左闭右闭的，即[ ]
# b    1.0
# c    2.0
obj['b':'c'] = 5
obj
# a    0.0
# b    5.0
# c    5.0
# d    3.0

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

In [3]:
# DataFrame
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data['two']  # 选择一列，单个值选择一列

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [4]:
data[['three', 'one']]  # 选择多列，序列选择多列

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [5]:
data[:2]  # 选择行，切片选择的是行

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [6]:
data[data['three']>5]  # 选择行，bool数组选择的是行

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [7]:
data[data<5] = 0  # 这个很numpy
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [8]:
# 使用loc(轴标签)与iloc（整数标签）选择数据
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data.loc[['Colorado', 'New York'], ['two', 'three']]

Unnamed: 0,two,three
Colorado,5,6
New York,13,14


In [9]:
data.iloc[[3, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
New York,15,12,13
Utah,11,8,9


In [10]:
data.iloc[[2, 3]]  # 默认选择行

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [11]:
data.loc[:'Utah', 'two':'four']  # 索引也可以用做切片

Unnamed: 0,two,three,four
Ohio,1,2,3
Colorado,5,6,7
Utah,9,10,11


In [12]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,4,5,6
Utah,8,9,10
New York,12,13,14


In [13]:
# DataFrame索引选项
# 类型                       说明
# df[val]                    从DataFrame选取单列或一组列；在特殊情况下比较便利：布尔型数组（过滤行）、切片（行切片）、
#                                  或布尔型DataFrame（根据条件设置值）
# df.loc[val]                通过标签，选取DataFrame的单个行或一组行
# df.loc[:, val]             通过标签，选取单列或列子集
# df.loc[val1, val2]         通过标签，同时选取行和列
# df.iloc[where]             通过整数位置，从DataFrame选取单个行或行子集
# df.iloc[:, where]          通过整数位置，从DataFrame选取单个列或列子集
# df.iloc[where_i, where_j]  通过整数位置，同时选取行和列
# df.at[label, label_j]      通过行和列标签，选取单一的标量
# df.iat[i, j]               通过行和列的位置（整数），选取单一的标量
# reindex()                  通过标签选取行或列
# get _value(), set_value()  通过行和列标签选取单一值