### 索引对象Index

#### 1. Series和DataFrame中的索引都是Index对象

In [1]:
import numpy as np
import pandas as pd

In [4]:
ser_obj = pd.Series(range(10, 20))

year_data = {2001: 17.8, 2002: 20.1, 2003: 16.5}
ser_obj2 = pd.Series(year_data)

# 通过dict构建DataFrame
dict_data = {'A': 1, 
             'B': pd.Timestamp('20170426'),
             'C': pd.Series(1, index=list(range(4)),dtype='float32'),
             'D': np.array([3] * 4,dtype='int32'),
             'E': ["Python","Java","C++","C"],
             'F': 'ITCast' }
#print dict_data
df_obj2 = pd.DataFrame(dict_data)

In [3]:
print(type(ser_obj.index))

print(type(ser_obj2.index))

<class 'pandas.core.indexes.range.RangeIndex'>
<class 'pandas.core.indexes.numeric.Int64Index'>


In [5]:
print(df_obj2.index)

Int64Index([0, 1, 2, 3], dtype='int64')


#### 2. 索引对象不可变，保证了数据的安全

In [6]:
df_obj2.index[0] = 2      # 报错

TypeError: Index does not support mutable operations

#### 常见的Index种类

In [7]:
# Index，索引
# Int64Index，整数索引
# Multiple，层级索引
# DatetimeIndex，时间戳类型

### Series索引

#### 1. index指定索引名

In [8]:
ser_obj = pd.Series(range(5), index=['a', 'b', 'c', 'd', 'e'])
print(ser_obj.head())

a    0
b    1
c    2
d    3
e    4
dtype: int64


#### 2. 索引行

In [12]:
print(ser_obj['b'])

print(ser_obj[2])

1
2


#### 3. 切片索引

In [14]:
print(ser_obj[1: 3])
print(ser_obj['b': 'd'])

b    1
c    2
dtype: int64
b    1
c    2
d    3
dtype: int64


#### 4. 不连续索引

In [15]:
print(ser_obj[[0, 2, 4]])

print(ser_obj[['a', 'e']])

a    0
c    2
e    4
dtype: int64
a    0
e    4
dtype: int64


#### 5. 布尔索引

In [16]:
ser_bool = ser_obj > 2
print(ser_bool)

a    False
b    False
c    False
d     True
e     True
dtype: bool


In [17]:
print(ser_obj[ser_bool])

d    3
e    4
dtype: int64


In [18]:
print(ser_obj[ser_obj > 2])   # 等价于上述两步

d    3
e    4
dtype: int64


### DataFrame索引

#### 1. columns 指定列索引名

In [21]:
df_obj = pd.DataFrame(np.random.randn(5,4), columns=['a', 'b', 'c', 'd'])
print(df_obj.head())

          a         b         c         d
0  0.225820  1.292301 -1.352446  0.319650
1 -0.671340  0.814223  1.126142  0.641342
2  1.982928  1.869269  0.292290 -2.205226
3 -0.341130 -0.395327  0.469066  0.630108
4  2.443732 -0.151896 -0.222858 -0.971540


#### 2. 列索引

In [22]:
print(df_obj['a'])      # 返回Series类型

0    0.225820
1   -0.671340
2    1.982928
3   -0.341130
4    2.443732
Name: a, dtype: float64


#### 3. 不连续索引

In [27]:
print(df_obj[['a', 'c']])

          a         c
0  0.225820 -1.352446
1 -0.671340  1.126142
2  1.982928  0.292290
3 -0.341130  0.469066
4  2.443732 -0.222858


In [28]:
print(type(df_obj[['a', 'c']]))

<class 'pandas.core.frame.DataFrame'>


### 高级索引：标签、位置和混合

In [29]:
# Pandas的高级索引有3种

#### 1. loc标签索引

In [32]:
# DataFrame 不能直接切片，可以通过loc来做切片
# loc是基于标签名的索引，也就是我们自定义的索引名

In [33]:
# Series
print(ser_obj['b': 'd'])
print(ser_obj.loc['b': 'd'])

b    1
c    2
d    3
dtype: int64
b    1
c    2
d    3
dtype: int64


In [34]:
# DataFrame
print(df_obj['a'])

# 第一个参数索引行，第二个参数是列
print(df_obj.loc[0:2, 'a'])

0    0.225820
1   -0.671340
2    1.982928
3   -0.341130
4    2.443732
Name: a, dtype: float64
0    0.225820
1   -0.671340
2    1.982928
Name: a, dtype: float64


#### 2. iloc位置索引

In [35]:
# Series
print(ser_obj[1:3])
print(ser_obj.iloc[1:3])

b    1
c    2
dtype: int64
b    1
c    2
dtype: int64


In [36]:
# DataFrame
print(df_obj.iloc[0:2, 0])      # 注意和df_obj.loc[0:2, 'a']的区别

0    0.22582
1   -0.67134
Name: a, dtype: float64


#### 3. ix标签与位置混合索引

In [37]:
# ix是以上二者的综合，既可以使用索引编号，又可以使用自定义索引，要视情况不同来使用，
# 如果索引既有数字又有英文，那么这种方式是不建议使用的，容易导致定位的混乱。

In [38]:
# Series
print(ser_obj.ix[1: 3])
print(ser_obj.ix['b', 'c'])

b    1
c    2
dtype: int64
1


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


In [39]:
# DataFrame
print(df_obj.loc[0:2, 'a'])
print(df_obj.ix[0:2, 0])

0    0.225820
1   -0.671340
2    1.982928
Name: a, dtype: float64
0    0.225820
1   -0.671340
2    1.982928
Name: a, dtype: float64


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until
