# 索引扩展

In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series(np.arange(5),index=np.arange(5)[::-1],dtype='int64')
print(s)

4    0
3    1
2    2
1    3
0    4
dtype: int64


In [3]:
# isin([1,3,4]) 查看数据表中是否有查看的数据
print(s.isin([1,3,4]))

4    False
3     True
2    False
1     True
0     True
dtype: bool


In [5]:
# MultiIndex.from_product([[0,1],['a','b','c']]) 二重索引
s2 = pd.Series(np.arange(6),index=pd.MultiIndex.from_product([[0,1],['a','b','c']]))
print(s2)

0  a    0
   b    1
   c    2
1  a    3
   b    4
   c    5
dtype: int32


In [6]:
# 查看数据表中1a和0b两条数据
print(s2.iloc[s2.index.isin([(1,'a'),(0,'b')])])

0  b    1
1  a    3
dtype: int32


In [7]:
print(s)

4    0
3    1
2    2
1    3
0    4
dtype: int64


In [8]:
print(s[s>2])

1    3
0    4
dtype: int64


In [9]:
dates = pd.date_range('20171124',periods=8)
print(dates)

DatetimeIndex(['2017-11-24', '2017-11-25', '2017-11-26', '2017-11-27',
               '2017-11-28', '2017-11-29', '2017-11-30', '2017-12-01'],
              dtype='datetime64[ns]', freq='D')


In [10]:
df = pd.DataFrame(np.random.randn(8,4),index=dates,columns=['A','B','C','D'])
print(df)

                   A         B         C         D
2017-11-24  1.480257  0.824786 -0.011374 -0.240725
2017-11-25  1.449743  0.077168 -0.986485  1.012821
2017-11-26 -0.584738 -1.610924 -0.183453 -0.771603
2017-11-27  0.074118  0.311289 -0.044556 -1.216278
2017-11-28 -0.718916 -0.378906  0.035694  0.037987
2017-11-29  0.083994 -1.529310  0.417731  0.548021
2017-11-30  0.480498 -0.609897  1.403652 -1.168940
2017-12-01 -0.220154  0.127383  0.200535  0.963977


In [15]:
# print(df.select(lambda x:x=='A',axis='columns'))

In [11]:
# where() 在数据表中查看小于0的数据，大于0的数据默认显示NaN
print(df.where(df<0))

                   A         B         C         D
2017-11-24       NaN       NaN -0.011374 -0.240725
2017-11-25       NaN       NaN -0.986485       NaN
2017-11-26 -0.584738 -1.610924 -0.183453 -0.771603
2017-11-27       NaN       NaN -0.044556 -1.216278
2017-11-28 -0.718916 -0.378906       NaN       NaN
2017-11-29       NaN -1.529310       NaN       NaN
2017-11-30       NaN -0.609897       NaN -1.168940
2017-12-01 -0.220154       NaN       NaN       NaN


In [13]:
# where()  在数据表中将不符合条件的数据，默认显示为NaN替换成其他的数据
print(df.where(df<0,'-a'))

                   A         B          C         D
2017-11-24        -a        -a -0.0113741 -0.240725
2017-11-25        -a        -a  -0.986485        -a
2017-11-26 -0.584738  -1.61092  -0.183453 -0.771603
2017-11-27        -a        -a -0.0445564  -1.21628
2017-11-28 -0.718916 -0.378906         -a        -a
2017-11-29        -a  -1.52931         -a        -a
2017-11-30        -a -0.609897         -a  -1.16894
2017-12-01 -0.220154        -a         -a        -a


In [17]:
df = pd.DataFrame(np.random.randn(10,3),columns=list('abc'))
print(df)

          a         b         c
0  1.058602  1.565601  0.300345
1  0.986514  0.246536 -0.994049
2 -0.203724 -0.519371  1.220880
3  0.912827  0.936304 -0.940842
4 -1.128773  0.574446  1.764795
5  0.073877 -1.185987  0.900702
6 -0.426032  0.586070  1.028971
7  0.441028  0.798077  0.757395
8 -1.421758 -0.726521 -0.347595
9 -0.927766 -1.094000  0.303804


In [18]:
print(df.query('a<b'))

          a         b         c
0  1.058602  1.565601  0.300345
3  0.912827  0.936304 -0.940842
4 -1.128773  0.574446  1.764795
6 -0.426032  0.586070  1.028971
7  0.441028  0.798077  0.757395
8 -1.421758 -0.726521 -0.347595


In [19]:
print(df.query('a<b & b<c'))

          a         b         c
4 -1.128773  0.574446  1.764795
6 -0.426032  0.586070  1.028971
8 -1.421758 -0.726521 -0.347595
