# index对象的使用
## index对象是pandas的核心
###  索引类似于元组，其本身不能赋值修改；在数据进行运算时，辅助自动对齐；多层索引帮助改变表的形态

In [1]:
import numpy as np
import pandas as pd

### 以下将从单层索引和多层索引进行操作
## 1.单层索引
### 1.1 创建

#### `pd.Index(data, dtype = Object, name= None)`
- data:一维列表
- dtype：默认 Object，索引元素的类型
- name：索引元素的名字，类似于列的名字

In [4]:
data=['a','b','c']
name = 'name1'
index = pd.Index(data,name=name)

In [5]:
index

Index(['a', 'b', 'c'], dtype='object', name='name1')

In [6]:
index.name

'name1'

In [8]:
index.dtype

dtype('O')

In [9]:
index.values

array(['a', 'b', 'c'], dtype=object)

## 1.2 查
### 查询方式和 一维ndarray或Series的 `[]` 完全一样

In [10]:
index[0]

'a'

In [13]:
index[0:2] # 范围，返回index

Index(['a', 'b'], dtype='object', name='name1')

In [14]:
index[[0,2]] # 列表，返回index

Index(['a', 'c'], dtype='object', name='name1')

In [15]:
mask= [True,True,False]
index[mask]

Index(['a', 'b'], dtype='object', name='name1')

## 1.3 改索引名
- 索引的值不能修改，但是可以修改索引的名字

### 1.3.1 直接改

In [16]:
index

Index(['a', 'b', 'c'], dtype='object', name='name1')

In [17]:
index.name="new_name"
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

### 1.3.2 函数修改
#### `Index.set_names(names, inplace= False)`
- names: 要设置的名字，可以为列表
- inplace：是否原地修改

In [19]:
index.set_names('new_new_name')

Index(['a', 'b', 'c'], dtype='object', name='new_new_name')

In [20]:
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

## 1.4 增
### 1.4.1 按位置增加一行
### 函数`Index.insert(loc,item)`
- loc：位置，数字
- item：值

In [21]:
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

In [29]:
index.insert(loc=2, item='d')

Index(['a', 'b', 'd', 'c'], dtype='object', name='new_name')

### 1.4.2 尾部添加多行
#### 函数 `Index.append(other)`
- other:其他索引对象

In [36]:
index2 = index.copy()
index2

Index(['a', 'b', 'c'], dtype='object', name='new_name')

In [37]:
index2

Index(['a', 'b', 'c'], dtype='object', name='new_name')

In [38]:
index.append(index2)

Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object', name='new_name')

In [40]:
index2.name='new_new_name'
index2

Index(['a', 'b', 'c'], dtype='object', name='new_new_name')

In [42]:
index3=index.append(index2)
index3

Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')

In [43]:
index.name

'new_name'

### 1.4.3 并
#### 函数`Index.union(other)`
- other:其他索引对象

In [44]:
index4 = pd.Index(['a','b','d'],name='nihao')
index4

Index(['a', 'b', 'd'], dtype='object', name='nihao')

In [45]:
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

In [46]:
index.union(index4)

Index(['a', 'b', 'c', 'd'], dtype='object')

## 1.5 删
### 1.5.1 按位置删除一行
#### 函数 `Index.delete(loc)`
- loc:位置 

In [47]:
index.delete(1)

Index(['a', 'c'], dtype='object', name='new_name')

In [48]:
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

### 1.5.2 按索引删除多行
#### 函数 `Index.drop(labels)`
- labels:索引列表

In [49]:
index.drop('a')

Index(['b', 'c'], dtype='object', name='new_name')

In [50]:
index

Index(['a', 'b', 'c'], dtype='object', name='new_name')

### 1.5.3 交
#### 函数`Index.intersection(other)`
- other:其他索引对象

In [51]:
index4

Index(['a', 'b', 'd'], dtype='object', name='nihao')

In [52]:
index.intersection(index4)

Index(['a', 'b'], dtype='object')

## ---------------------------------------------------------------------------------------------------
## 2.多层索引
### 2.1多层索引创建
#### 函数`pd.MultiIndex.from_tuples(tuples, names=None)`
- tuples: 元组或列表 的列表
- names：名字的列表
### 函数`pd.MultiIndex.from_arrays(arrays,names)`

In [56]:
data=[('a','one'),('a','two'),('b','one')]
index = pd.MultiIndex.from_tuples(tuples= data,names=['name1','name2'])
index

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['name1', 'name2'])

In [57]:
s= pd.Series([1,2,3],index= index)
s

name1  name2
a      one      1
       two      2
b      one      3
dtype: int64

### 2.2 查
查询方式和 单层 一样

In [58]:
index

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['name1', 'name2'])

In [59]:
index[0]

('a', 'one')

In [62]:
index[0:2] # 列表，返回 MultiIndex

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0], [0, 1]],
           names=['name1', 'name2'])

In [63]:
index[[0,2]]

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 1], [0, 0]],
           names=['name1', 'name2'])

In [64]:
# mark 也是一样的

###  获取某一层索引 `MultiIndex.get_level_values(level)`
- level: int,选中那一层

In [66]:
index.get_level_values(level= 0)

Index(['a', 'a', 'b'], dtype='object', name='name1')

In [67]:
index.get_level_values(level=1)

Index(['one', 'two', 'one'], dtype='object', name='name2')

## 2.3 改
## 2.3.1 改索引名
### 函数 `MultiIndex.set_names(names,level = None, inplace=False)`
- names; 要设置的名字，可以是名字列表
- level：多层索引要设置修改的索引层次，需要与names 匹配（指names为列表的时候，level也为列表，需要匹配）
- inplace：是否原地修改，默认 否

In [68]:
index.set_names(names='new_name1',level=0)

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['new_name1', 'name2'])

In [69]:
index.set_names(names=['new_name11','new_name22'],level=[0,1])

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['new_name11', 'new_name22'])

### 2.3.2 改索引层次顺序
### 函数 `MultiIndex.swaplevel(i=-2, j=-1)`

In [71]:
help(index.swaplevel(i=-2, j=-1))

Help on MultiIndex in module pandas.core.indexes.multi object:

class MultiIndex(pandas.core.indexes.base.Index)
 |  A multi-level, or hierarchical, index object for pandas objects
 |  
 |  Parameters
 |  ----------
 |  levels : sequence of arrays
 |      The unique labels for each level
 |  labels : sequence of arrays
 |      Integers for each level designating which label at each location
 |  sortorder : optional int
 |      Level of sortedness (must be lexicographically sorted by that
 |      level)
 |  names : optional sequence of objects
 |      Names for each of the index levels. (name is accepted for compat)
 |  copy : boolean, default False
 |      Copy the meta-data
 |  verify_integrity : boolean, default True
 |      Check that the levels/labels are consistent and valid
 |  
 |  Examples
 |  ---------
 |  A new ``MultiIndex`` is typically constructed using one of the helper
 |  methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
 |  and :meth:`MultiIndex.f

In [72]:
index.swaplevel()

MultiIndex(levels=[['one', 'two'], ['a', 'b']],
           labels=[[0, 1, 0], [0, 0, 1]],
           names=['name2', 'name1'])

### 函数 `Series.swaplevel(i=-2, j=-1)`
### 函数 `DataFrame.swaplevel(i=-2, j=-1,axis= 1)`

In [73]:
s

name1  name2
a      one      1
       two      2
b      one      3
dtype: int64

In [74]:
s.swaplevel()

name2  name1
one    a        1
two    a        2
one    b        3
dtype: int64

In [75]:
type(s)

pandas.core.series.Series

In [81]:
columns = index.copy()
print(columns)
columns.set_names( names = ['name3','name4'], level = [0,1], inplace = True) #列索引取和行索引相同，只是改了名字
print(columns)
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], index= index, columns = columns)
df

MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['name1', 'name2'])
MultiIndex(levels=[['a', 'b'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['name3', 'name4'])


Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [82]:
df.swaplevel(axis=1) # 交换列索引顺序

Unnamed: 0_level_0,name4,one,two,one
Unnamed: 0_level_1,name3,a,a,b
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [83]:
df.swaplevel(axis=0)

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name2,name1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,a,1,2,3
two,a,4,5,6
one,b,7,8,9


## ------------------------------------------------------------
## 3 多层索引的使用方法
对 values进行查看时，多层索引可以分开使用
### 3.1 外层的索引
    对于外层索引，无论是Series或DataFrame，外层索引是可以直接使用的。只有外层才能够索引。用法和之前的一样

In [84]:
df1 = df.copy()
df1

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


### 3.1.1 `[]`方法

In [86]:
df1['a'] # 列外层

Unnamed: 0_level_0,name4,one,two
name1,name2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,1,2
a,two,4,5
b,one,7,8


In [89]:
df1[['a','b']] # 列外层

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [91]:
df1[0:1] # 行外层

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3


In [92]:
df1[0:2]

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6


In [93]:
mask =[True,False,True]  # 行外层
df1[mask]

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
b,one,7,8,9


### 3.1.2 `.loc[]`

In [94]:
df1.loc['a'] # 行索引

name3,a,a,b
name4,one,two,one
name2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
one,1,2,3
two,4,5,6


In [95]:
df1.loc['a','b']

name4,one
name2,Unnamed: 1_level_1
one,3
two,6


In [97]:
df1.loc['a':'b','a']

Unnamed: 0_level_0,name4,one,two
name1,name2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,1,2
a,two,4,5
b,one,7,8


In [98]:
df1.loc[['a','b'],'b']

Unnamed: 0_level_0,name4,one
name1,name2,Unnamed: 2_level_1
a,one,3
a,two,6
b,one,9


In [99]:
df1.loc[['a'],['b','b']]

Unnamed: 0_level_0,name3,b
Unnamed: 0_level_1,name4,one
name1,name2,Unnamed: 2_level_2
a,one,3
a,two,6


### 3.1.3 `.iloc[]`

In [100]:
df1

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [101]:
df1.iloc[0:2,1]

name1  name2
a      one      2
       two      5
Name: (a, two), dtype: int64

In [102]:
type(df1.iloc[0:2,1])

pandas.core.series.Series

In [103]:
df1.iloc[0,0:2]

name3  name4
a      one      1
       two      2
Name: (a, one), dtype: int64

In [104]:
df1.iloc[[0,1],0:2]

Unnamed: 0_level_0,name3,a,a
Unnamed: 0_level_1,name4,one,two
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,one,1,2
a,two,4,5


## 3.2 对于内层索引
- 内层索引不可直接使用，必须是先外层，再内层。
- 内层只能使用单索引形式

 ### 3.2.1 `[ , ]`
 `取一列，先外层单列索引，再内层单列索引，其他形式都报错`

In [106]:
df1

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [109]:
df1['a','one']  # 取一列，先外层单列索引，再内层单列索引，其他形式都报错

name1  name2
a      one      1
       two      4
b      one      7
Name: (a, one), dtype: int64

 ### 3.2.2 `.loc[ , ]`
 - `取一行，先外层单行索引，再内层单列索引，其他形式都报错`

In [110]:
df1.loc['a','one']

name3  name4
a      one      1
       two      2
b      one      3
Name: (a, one), dtype: int64

### 3.2.3 `.iloc[]`
这方方法按位置索引

In [113]:
df1.iloc[0,0:4]

name3  name4
a      one      1
       two      2
b      one      3
Name: (a, one), dtype: int64

## 3.3 xs直接选取法
### `适合在单层level选取，不能行列同时操作`
`Series.xs(key, level=None, drop_level=True)`

`DataFrame.xs(key, axis=0, level=None, drop_level=True)`
- key: 要选取的索引值或其列表；
- axis：0-行索引，1-列索引；
- level：索引层次；
- drop_level：True or False，是否显示用于选取的level索引，默认不显示。

In [114]:
df1

Unnamed: 0_level_0,name3,a,a,b
Unnamed: 0_level_1,name4,one,two,one
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,1,2,3
a,two,4,5,6
b,one,7,8,9


In [120]:
df1.xs(key = 'one',axis= 0, level= 1)

name3,a,a,b
name4,one,two,one
name1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,1,2,3
b,7,8,9


In [125]:
df1.xs(key = ['two'],axis= 0, level= [1])

name3,a,a,b
name4,one,two,one
name1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,4,5,6


In [129]:
df1.xs(key='one',axis=1,level=1)

Unnamed: 0_level_0,name3,a,b
name1,name2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,1,3
a,two,4,6
b,one,7,9


In [130]:
df1.xs('two',axis=1,level=1)

Unnamed: 0_level_0,name3,a
name1,name2,Unnamed: 2_level_1
a,one,2
a,two,5
b,one,8
