In [14]:
import pandas as pd
import numpy as np

# 4.1.4 索引类型

In [15]:
df = pd.read_excel('team.xlsx')
df.head()

Unnamed: 0,name,team,Q1,Q2,Q3,Q4
0,Liver,E,89,21,24,64
1,Arry,C,36,37,37,57
2,Ack,A,57,60,18,84
3,Eorge,C,93,96,71,78
4,Oah,D,65,49,61,86


In [16]:
pd.RangeIndex(1,100,2)

RangeIndex(start=1, stop=100, step=2)

In [17]:
# 类别索引
# 只能包含有限数量的可能只，是枚举，如性别只有男女，只有在大数据面前才有优势
pd.CategoricalIndex(['a','b','c'])

CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category')

In [18]:
# 间隔索引
# 代表每个数据的数值或者时间区间，一般用于分箱数据
pd.interval_range(start=0, end=5)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]')

In [19]:
# 多层索引
# 多个层次且有归属关系的索引
arr = [[1,1,2,2],['red','blue','red','blue']]
pd.MultiIndex.from_arrays(arr, names=('number','color'))

MultiIndex([(1,  'red'),
            (1, 'blue'),
            (2,  'red'),
            (2, 'blue')],
           names=['number', 'color'])

In [20]:
# 时间索引
# 1，时序数据的时间
pd.date_range(start='1/1/2020',end='12/12/2020')

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10',
               ...
               '2020-12-03', '2020-12-04', '2020-12-05', '2020-12-06',
               '2020-12-07', '2020-12-08', '2020-12-09', '2020-12-10',
               '2020-12-11', '2020-12-12'],
              dtype='datetime64[ns]', length=347, freq='D')

In [21]:
# 2，指定开始日期和周期
pd.date_range(start='1/1/2020',periods=4)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04'], dtype='datetime64[ns]', freq='D')

In [22]:
# 3, 以月为周期
pd.period_range(start='1/1/2020',end='4/12/2020', freq = 'M')

PeriodIndex(['2020-01', '2020-02', '2020-03', '2020-04'], dtype='period[M]')

In [23]:
# 周期嵌套
pd.period_range(start=pd.Period('2017Q1', freq='Q'),end=pd.Period('2017Q2', freq='Q'), freq = 'M')

PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]')

In [24]:
# 时间差索引
# 代表时间长度数据
import datetime
pd.TimedeltaIndex(data=['1 days','1 days, 00:00:05',np.timedelta64(2,'D'),datetime.timedelta(days=2,seconds=2)])

TimedeltaIndex(['1 days 00:00:00', '1 days 00:00:05', '2 days 00:00:00',
                '2 days 00:00:02'],
               dtype='timedelta64[ns]', freq=None)

In [25]:
# 周期索引
# 一定频度的时间
t = pd.period_range('2020-5-1 10:00:57', periods=8, freq='S')
pd.PeriodIndex(t, freq='S')

PeriodIndex(['2020-05-01 10:00:57', '2020-05-01 10:00:58',
             '2020-05-01 10:00:59', '2020-05-01 10:01:00',
             '2020-05-01 10:01:01', '2020-05-01 10:01:02',
             '2020-05-01 10:01:03', '2020-05-01 10:01:04'],
            dtype='period[S]')

# 4.1.5 索引对象

In [27]:
# 4.1.5 索引对象
pd.Index([1,2,3])

Index([1, 2, 3], dtype='int64')

In [28]:
pd.Index(list('abc'))

Index(['a', 'b', 'c'], dtype='object')

In [29]:
pd.Index(['x','y','z'], name = "something")

Index(['x', 'y', 'z'], dtype='object', name='something')

In [30]:
# 查看索引对象，列方向
df.index

RangeIndex(start=0, stop=100, step=1)

In [31]:
# 查看索引对象，行方向
df.columns

Index(['name', 'team', 'Q1', 'Q2', 'Q3', 'Q4'], dtype='object')

# 4.1.6 索引属性

In [32]:
df.index.name

In [33]:
df.index.array

<PandasArray>
[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
 95, 96, 97, 98, 99]
Length: 100, dtype: int64

In [34]:
df.index.dtype

dtype('int64')

In [35]:
df.index.shape

(100,)

In [36]:
df.index.size

100

In [37]:
df.index.values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [38]:
# 是否为空
df.index.empty

False

In [39]:
# 是否不重复
df.index.is_unique

True

In [40]:
# 名称列表
df.index.names

FrozenList([None])

In [41]:
# 是否有重复值
df.index.has_duplicates

False