# Pandas
## Pandas的数据结构

### 三大结构
- **Series(一维数据)**
- **DataFrame(二维数据)**
- **Panel(三维结构数据/面板数据)**
> 注释：对于Panel，会很少使用，通常会使用使用MultiIndex这种结构解决三维数据表示问题

In [1]:
import numpy as np
# 创建一个符合正太分布的500个股票504天的涨跌幅数据
stock_day_rise_np = np.random.normal(0, 1, (500, 504))
stock_day_rise_np

array([[ 0.24863578, -1.21297638,  0.45243339, ...,  0.64790064,
         0.37851542,  0.96266331],
       [-0.05013476,  0.93304574,  1.4670602 , ...,  0.2675775 ,
        -0.77781738, -0.08361023],
       [-1.09098886,  1.6061668 , -0.65006337, ...,  0.95190029,
        -0.60554559, -0.8979445 ],
       ...,
       [ 0.4785737 , -1.36352564,  1.03889849, ..., -0.65220661,
         1.10218065,  0.14613888],
       [-0.28994837, -0.53794123, -0.55399037, ..., -0.57083329,
         0.0051516 ,  0.20566739],
       [ 1.42888549, -1.16999404,  0.36480588, ...,  1.25873205,
        -0.85744485, -0.54795688]])

In [2]:
import pandas as pd
# 使用Pandas中的数据结构
stock_day_rise = pd.DataFrame(stock_day_rise_np)

In [3]:
stock_day_rise

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,494,495,496,497,498,499,500,501,502,503
0,0.248636,-1.212976,0.452433,0.603450,0.912295,-1.048524,0.317964,-0.321733,-0.139335,-1.412826,...,0.783374,1.002562,-0.278472,0.873235,-0.318341,-0.260852,-1.098349,0.647901,0.378515,0.962663
1,-0.050135,0.933046,1.467060,0.581106,0.218312,-0.048434,-0.676546,-0.113399,1.162123,1.121898,...,-1.333668,-0.693868,1.809783,0.439752,-0.118978,1.919913,0.469173,0.267577,-0.777817,-0.083610
2,-1.090989,1.606167,-0.650063,-0.724404,-1.165641,0.663112,0.835546,-0.529276,-0.524226,0.420000,...,0.535531,-0.666180,-1.435093,-0.316295,0.326462,-1.268363,-0.451408,0.951900,-0.605546,-0.897945
3,-1.330916,0.015030,-0.200099,1.066415,-0.195370,-0.745270,-1.780425,-0.407058,0.595408,-1.570257,...,-1.696549,1.384452,0.022715,-1.903010,-0.698312,0.306215,-1.382075,0.289926,0.891708,0.294133
4,-0.937603,2.329901,0.311763,-1.254862,0.774169,-2.327742,-1.524340,0.069368,-1.129643,1.428070,...,0.671176,-1.486104,0.284705,0.129145,-0.164304,-0.423064,-0.389140,-0.156411,0.363409,0.469134
5,1.781887,0.151121,-0.138595,2.716899,0.226893,-0.708622,-0.774401,-0.085147,0.409184,-0.977193,...,-0.081113,-0.943548,1.628140,0.586177,0.378039,-0.607618,0.063293,-1.419314,0.138460,0.955441
6,0.201147,0.772722,1.358849,2.273656,-0.531711,-0.139551,-0.992424,-0.502545,1.291582,0.393029,...,-0.307547,-1.377955,0.572451,-0.345125,-0.079749,1.387242,-0.515746,0.242502,0.550178,-0.652536
7,0.133195,0.391140,0.881296,-0.752443,1.355425,0.616993,0.797608,0.012218,1.126323,-1.201661,...,0.211375,1.275536,-0.320471,0.420688,2.029628,-0.494981,1.414881,0.713688,-0.319300,-0.429547
8,-0.427445,-0.962785,0.965084,-1.011832,0.952474,0.226155,0.845588,-2.444375,-0.895512,1.651950,...,0.501665,-0.167887,-0.684520,0.222067,0.155328,-2.658459,-0.723382,-0.163184,-0.132626,-0.106372
9,-0.322723,-0.423650,0.605198,0.339110,-1.158203,-0.632055,0.700976,0.064644,-1.349730,-0.978336,...,0.673681,0.131195,1.686748,-1.041313,-0.199637,0.353975,0.295299,0.853829,1.905387,0.073701


### DataFrame
**DataFrame对象既有行索引，又有列索引**
- 行索引，表明不同行，横向索引，叫index，0轴，axis=0
- 列索引，表名不同列，纵向索引，叫columns，1轴，axis=1

***给DataFrame数据增加行列索引&名称***

- 增加行索引

In [4]:
# 构造行索引索引序列
stock_code = ['股票' + str(i) for i in range(stock_day_rise.shape[0])]

# 添加行索引
data = pd.DataFrame(stock_day_rise_np, index=stock_code)

In [5]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,494,495,496,497,498,499,500,501,502,503
股票0,0.248636,-1.212976,0.452433,0.603450,0.912295,-1.048524,0.317964,-0.321733,-0.139335,-1.412826,...,0.783374,1.002562,-0.278472,0.873235,-0.318341,-0.260852,-1.098349,0.647901,0.378515,0.962663
股票1,-0.050135,0.933046,1.467060,0.581106,0.218312,-0.048434,-0.676546,-0.113399,1.162123,1.121898,...,-1.333668,-0.693868,1.809783,0.439752,-0.118978,1.919913,0.469173,0.267577,-0.777817,-0.083610
股票2,-1.090989,1.606167,-0.650063,-0.724404,-1.165641,0.663112,0.835546,-0.529276,-0.524226,0.420000,...,0.535531,-0.666180,-1.435093,-0.316295,0.326462,-1.268363,-0.451408,0.951900,-0.605546,-0.897945
股票3,-1.330916,0.015030,-0.200099,1.066415,-0.195370,-0.745270,-1.780425,-0.407058,0.595408,-1.570257,...,-1.696549,1.384452,0.022715,-1.903010,-0.698312,0.306215,-1.382075,0.289926,0.891708,0.294133
股票4,-0.937603,2.329901,0.311763,-1.254862,0.774169,-2.327742,-1.524340,0.069368,-1.129643,1.428070,...,0.671176,-1.486104,0.284705,0.129145,-0.164304,-0.423064,-0.389140,-0.156411,0.363409,0.469134
股票5,1.781887,0.151121,-0.138595,2.716899,0.226893,-0.708622,-0.774401,-0.085147,0.409184,-0.977193,...,-0.081113,-0.943548,1.628140,0.586177,0.378039,-0.607618,0.063293,-1.419314,0.138460,0.955441
股票6,0.201147,0.772722,1.358849,2.273656,-0.531711,-0.139551,-0.992424,-0.502545,1.291582,0.393029,...,-0.307547,-1.377955,0.572451,-0.345125,-0.079749,1.387242,-0.515746,0.242502,0.550178,-0.652536
股票7,0.133195,0.391140,0.881296,-0.752443,1.355425,0.616993,0.797608,0.012218,1.126323,-1.201661,...,0.211375,1.275536,-0.320471,0.420688,2.029628,-0.494981,1.414881,0.713688,-0.319300,-0.429547
股票8,-0.427445,-0.962785,0.965084,-1.011832,0.952474,0.226155,0.845588,-2.444375,-0.895512,1.651950,...,0.501665,-0.167887,-0.684520,0.222067,0.155328,-2.658459,-0.723382,-0.163184,-0.132626,-0.106372
股票9,-0.322723,-0.423650,0.605198,0.339110,-1.158203,-0.632055,0.700976,0.064644,-1.349730,-0.978336,...,0.673681,0.131195,1.686748,-1.041313,-0.199637,0.353975,0.295299,0.853829,1.905387,0.073701


- 增加列索引
    - 股票的日期是一个时间的序列，我们要实现从前往后的时间还要考虑每月的总天数等，不方便。
暂时使用pd.date_range()：用于生成一组连续的时间序列(暂时了解)

In [6]:
"""
date_range(start=None,end=None, periods=None, freq='B')
    start:开始时间
    end:结束时间
    periods:时间天数
    freq:递进单位，默认1天,'B'默认略过周末
"""
# 生成一个时间的序列，略过周末非交易日
date = pd.date_range('2017-01-01', periods=stock_day_rise.shape[1], freq='B')

# index代表行索引，columns代表列索引
data = pd.DataFrame(stock_day_rise_np, index=stock_code, columns=date)

In [7]:
data

Unnamed: 0,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,2017-01-11 00:00:00,2017-01-12 00:00:00,2017-01-13 00:00:00,...,2018-11-23 00:00:00,2018-11-26 00:00:00,2018-11-27 00:00:00,2018-11-28 00:00:00,2018-11-29 00:00:00,2018-11-30 00:00:00,2018-12-03 00:00:00,2018-12-04 00:00:00,2018-12-05 00:00:00,2018-12-06 00:00:00
股票0,0.248636,-1.212976,0.452433,0.603450,0.912295,-1.048524,0.317964,-0.321733,-0.139335,-1.412826,...,0.783374,1.002562,-0.278472,0.873235,-0.318341,-0.260852,-1.098349,0.647901,0.378515,0.962663
股票1,-0.050135,0.933046,1.467060,0.581106,0.218312,-0.048434,-0.676546,-0.113399,1.162123,1.121898,...,-1.333668,-0.693868,1.809783,0.439752,-0.118978,1.919913,0.469173,0.267577,-0.777817,-0.083610
股票2,-1.090989,1.606167,-0.650063,-0.724404,-1.165641,0.663112,0.835546,-0.529276,-0.524226,0.420000,...,0.535531,-0.666180,-1.435093,-0.316295,0.326462,-1.268363,-0.451408,0.951900,-0.605546,-0.897945
股票3,-1.330916,0.015030,-0.200099,1.066415,-0.195370,-0.745270,-1.780425,-0.407058,0.595408,-1.570257,...,-1.696549,1.384452,0.022715,-1.903010,-0.698312,0.306215,-1.382075,0.289926,0.891708,0.294133
股票4,-0.937603,2.329901,0.311763,-1.254862,0.774169,-2.327742,-1.524340,0.069368,-1.129643,1.428070,...,0.671176,-1.486104,0.284705,0.129145,-0.164304,-0.423064,-0.389140,-0.156411,0.363409,0.469134
股票5,1.781887,0.151121,-0.138595,2.716899,0.226893,-0.708622,-0.774401,-0.085147,0.409184,-0.977193,...,-0.081113,-0.943548,1.628140,0.586177,0.378039,-0.607618,0.063293,-1.419314,0.138460,0.955441
股票6,0.201147,0.772722,1.358849,2.273656,-0.531711,-0.139551,-0.992424,-0.502545,1.291582,0.393029,...,-0.307547,-1.377955,0.572451,-0.345125,-0.079749,1.387242,-0.515746,0.242502,0.550178,-0.652536
股票7,0.133195,0.391140,0.881296,-0.752443,1.355425,0.616993,0.797608,0.012218,1.126323,-1.201661,...,0.211375,1.275536,-0.320471,0.420688,2.029628,-0.494981,1.414881,0.713688,-0.319300,-0.429547
股票8,-0.427445,-0.962785,0.965084,-1.011832,0.952474,0.226155,0.845588,-2.444375,-0.895512,1.651950,...,0.501665,-0.167887,-0.684520,0.222067,0.155328,-2.658459,-0.723382,-0.163184,-0.132626,-0.106372
股票9,-0.322723,-0.423650,0.605198,0.339110,-1.158203,-0.632055,0.700976,0.064644,-1.349730,-0.978336,...,0.673681,0.131195,1.686748,-1.041313,-0.199637,0.353975,0.295299,0.853829,1.905387,0.073701


### DatatFrame的属性 
- shape
- dtypes
- ndim
- index
- columns
- values
- T
- 还有一些方便整体查询的属性
    - head(5)
    - tail(5)

### DatatFrame索引的设置
- 修改行列索引值

In [9]:
# 修改行列索引值
# data.index[499] = "0000001.SH" # 无法修改

# 通过整体修改，不能单个赋值
data.index = [i for i in range(500)]

In [10]:
data

Unnamed: 0,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,2017-01-11 00:00:00,2017-01-12 00:00:00,2017-01-13 00:00:00,...,2018-11-23 00:00:00,2018-11-26 00:00:00,2018-11-27 00:00:00,2018-11-28 00:00:00,2018-11-29 00:00:00,2018-11-30 00:00:00,2018-12-03 00:00:00,2018-12-04 00:00:00,2018-12-05 00:00:00,2018-12-06 00:00:00
0,0.248636,-1.212976,0.452433,0.603450,0.912295,-1.048524,0.317964,-0.321733,-0.139335,-1.412826,...,0.783374,1.002562,-0.278472,0.873235,-0.318341,-0.260852,-1.098349,0.647901,0.378515,0.962663
1,-0.050135,0.933046,1.467060,0.581106,0.218312,-0.048434,-0.676546,-0.113399,1.162123,1.121898,...,-1.333668,-0.693868,1.809783,0.439752,-0.118978,1.919913,0.469173,0.267577,-0.777817,-0.083610
2,-1.090989,1.606167,-0.650063,-0.724404,-1.165641,0.663112,0.835546,-0.529276,-0.524226,0.420000,...,0.535531,-0.666180,-1.435093,-0.316295,0.326462,-1.268363,-0.451408,0.951900,-0.605546,-0.897945
3,-1.330916,0.015030,-0.200099,1.066415,-0.195370,-0.745270,-1.780425,-0.407058,0.595408,-1.570257,...,-1.696549,1.384452,0.022715,-1.903010,-0.698312,0.306215,-1.382075,0.289926,0.891708,0.294133
4,-0.937603,2.329901,0.311763,-1.254862,0.774169,-2.327742,-1.524340,0.069368,-1.129643,1.428070,...,0.671176,-1.486104,0.284705,0.129145,-0.164304,-0.423064,-0.389140,-0.156411,0.363409,0.469134
5,1.781887,0.151121,-0.138595,2.716899,0.226893,-0.708622,-0.774401,-0.085147,0.409184,-0.977193,...,-0.081113,-0.943548,1.628140,0.586177,0.378039,-0.607618,0.063293,-1.419314,0.138460,0.955441
6,0.201147,0.772722,1.358849,2.273656,-0.531711,-0.139551,-0.992424,-0.502545,1.291582,0.393029,...,-0.307547,-1.377955,0.572451,-0.345125,-0.079749,1.387242,-0.515746,0.242502,0.550178,-0.652536
7,0.133195,0.391140,0.881296,-0.752443,1.355425,0.616993,0.797608,0.012218,1.126323,-1.201661,...,0.211375,1.275536,-0.320471,0.420688,2.029628,-0.494981,1.414881,0.713688,-0.319300,-0.429547
8,-0.427445,-0.962785,0.965084,-1.011832,0.952474,0.226155,0.845588,-2.444375,-0.895512,1.651950,...,0.501665,-0.167887,-0.684520,0.222067,0.155328,-2.658459,-0.723382,-0.163184,-0.132626,-0.106372
9,-0.322723,-0.423650,0.605198,0.339110,-1.158203,-0.632055,0.700976,0.064644,-1.349730,-0.978336,...,0.673681,0.131195,1.686748,-1.041313,-0.199637,0.353975,0.295299,0.853829,1.905387,0.073701


- 重置索引

In [11]:
# 重置索引
data.reset_index(drop=True)

Unnamed: 0,2017-01-02 00:00:00,2017-01-03 00:00:00,2017-01-04 00:00:00,2017-01-05 00:00:00,2017-01-06 00:00:00,2017-01-09 00:00:00,2017-01-10 00:00:00,2017-01-11 00:00:00,2017-01-12 00:00:00,2017-01-13 00:00:00,...,2018-11-23 00:00:00,2018-11-26 00:00:00,2018-11-27 00:00:00,2018-11-28 00:00:00,2018-11-29 00:00:00,2018-11-30 00:00:00,2018-12-03 00:00:00,2018-12-04 00:00:00,2018-12-05 00:00:00,2018-12-06 00:00:00
0,0.248636,-1.212976,0.452433,0.603450,0.912295,-1.048524,0.317964,-0.321733,-0.139335,-1.412826,...,0.783374,1.002562,-0.278472,0.873235,-0.318341,-0.260852,-1.098349,0.647901,0.378515,0.962663
1,-0.050135,0.933046,1.467060,0.581106,0.218312,-0.048434,-0.676546,-0.113399,1.162123,1.121898,...,-1.333668,-0.693868,1.809783,0.439752,-0.118978,1.919913,0.469173,0.267577,-0.777817,-0.083610
2,-1.090989,1.606167,-0.650063,-0.724404,-1.165641,0.663112,0.835546,-0.529276,-0.524226,0.420000,...,0.535531,-0.666180,-1.435093,-0.316295,0.326462,-1.268363,-0.451408,0.951900,-0.605546,-0.897945
3,-1.330916,0.015030,-0.200099,1.066415,-0.195370,-0.745270,-1.780425,-0.407058,0.595408,-1.570257,...,-1.696549,1.384452,0.022715,-1.903010,-0.698312,0.306215,-1.382075,0.289926,0.891708,0.294133
4,-0.937603,2.329901,0.311763,-1.254862,0.774169,-2.327742,-1.524340,0.069368,-1.129643,1.428070,...,0.671176,-1.486104,0.284705,0.129145,-0.164304,-0.423064,-0.389140,-0.156411,0.363409,0.469134
5,1.781887,0.151121,-0.138595,2.716899,0.226893,-0.708622,-0.774401,-0.085147,0.409184,-0.977193,...,-0.081113,-0.943548,1.628140,0.586177,0.378039,-0.607618,0.063293,-1.419314,0.138460,0.955441
6,0.201147,0.772722,1.358849,2.273656,-0.531711,-0.139551,-0.992424,-0.502545,1.291582,0.393029,...,-0.307547,-1.377955,0.572451,-0.345125,-0.079749,1.387242,-0.515746,0.242502,0.550178,-0.652536
7,0.133195,0.391140,0.881296,-0.752443,1.355425,0.616993,0.797608,0.012218,1.126323,-1.201661,...,0.211375,1.275536,-0.320471,0.420688,2.029628,-0.494981,1.414881,0.713688,-0.319300,-0.429547
8,-0.427445,-0.962785,0.965084,-1.011832,0.952474,0.226155,0.845588,-2.444375,-0.895512,1.651950,...,0.501665,-0.167887,-0.684520,0.222067,0.155328,-2.658459,-0.723382,-0.163184,-0.132626,-0.106372
9,-0.322723,-0.423650,0.605198,0.339110,-1.158203,-0.632055,0.700976,0.064644,-1.349730,-0.978336,...,0.673681,0.131195,1.686748,-1.041313,-0.199637,0.353975,0.295299,0.853829,1.905387,0.073701


- 以某列值设置为新的索引

In [12]:
df = pd.DataFrame({'month':[1,4,7,10], 'year':[1, 1, 2, 2], 'sale':[55, 40, 84, 31]})
# df.set_index(['month'])# 设置新的索引值，但是返回一个新的dataframe
df = df.set_index(['month'])
# 设置多重索引 MultiIndex的结构
df.set_index(['year', df.index])

# 打印df的索引
df.index

Int64Index([1, 4, 7, 10], dtype='int64', name='month')

In [15]:
help(pd.DataFrame.shape)

Help on property:

    Return a tuple representing the dimensionality of the DataFrame.
    
    See Also
    --------
    ndarray.shape
    
    Examples
    --------
    >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
    >>> df.shape
    (2, 2)
    
    >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4],
    ...                    'col3': [5, 6]})
    >>> df.shape
    (2, 3)

