In [4]:
import pandas as pd
import numpy as np
import pandas._testing as tm

In [5]:
def unpivot(frame):
	N, K = frame.shape
	data = {
			"value": frame.to_numpy().ravel("F"),
			"variable": np.asarray(frame.columns).repeat(N),
			"date": np.tile(np.asarray(frame.index), K),
	}
	return pd.DataFrame(data, columns=["date", "variable", "value"])
    
df = unpivot(tm.makeTimeDataFrame(3))
df

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.051392
1,2000-01-04,A,0.289674
2,2000-01-05,A,-0.262376
3,2000-01-03,B,-1.589056
4,2000-01-04,B,0.68638
5,2000-01-05,B,1.303388
6,2000-01-03,C,-1.787768
7,2000-01-04,C,1.098697
8,2000-01-05,C,0.758155
9,2000-01-03,D,-0.780939


在上面的代码中，`tm.makeTimeDataFrame(3)`是pandas中的一个函数，用于生成一个具有时间索引（行）和多个随机数列（列）的DataFrame。`frame.to_numpy()`将这个DataFrame转换为一个NumPy数组，然后使用`ravel("F")`方法将数组展平为一维，并按列（"F"代表列优先）排列。

`ravel()`方法是NumPy中的一个函数，用于将多维数组展平为一维数组。在这里，使用`ravel("F")`方法将二维数组按列优先展平为一维数组。具体来说，它将DataFrame中每一列的值排列在一起，形成一个一维的NumPy数组。

这种展平操作通常用于将多维数组转换为一维数组，以便更容易地进行操作和计算。在这个例子中，将DataFrame展平为一维数组，可以方便地在数组上执行各种NumPy函数和操作。例如，可以使用`np.mean()`函数计算数组的平均值，使用`np.max()`函数计算数组中的最大值等等。

In [5]:
frame = tm.makeTimeDataFrame(3)
frame

Unnamed: 0,A,B,C,D
2000-01-03,-0.270213,-0.844347,-0.653107,1.333492
2000-01-04,0.725959,-0.059813,0.18685,-0.216286
2000-01-05,-0.583554,-1.007176,-1.533329,0.687112


In [10]:
frame = tm.makeTimeDataFrame(3)
frame.to_numpy().ravel("F")

array([-0.27021277,  0.72595873, -0.58355439, -0.84434736, -0.05981279,
       -1.007176  , -0.65310667,  0.18684997, -1.53332926,  1.33349191,
       -0.21628606,  0.68711153])

In [8]:
frame.to_numpy()

array([[-0.27021277, -0.84434736, -0.65310667,  1.33349191],
       [ 0.72595873, -0.05981279,  0.18684997, -0.21628606],
       [-0.58355439, -1.007176  , -1.53332926,  0.68711153]])

In [11]:
df

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.397221
1,2000-01-04,A,0.796207
2,2000-01-05,A,0.451627
3,2000-01-03,B,1.022632
4,2000-01-04,B,1.095201
5,2000-01-05,B,-0.244636
6,2000-01-03,C,-0.616154
7,2000-01-04,C,1.704806
8,2000-01-05,C,1.417464
9,2000-01-03,D,-0.964345


In [13]:
pivoted = df.pivot(index='date', columns='variable', values='value')
pivoted

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.397221,1.022632,-0.616154,-0.964345
2000-01-04,0.796207,1.095201,1.704806,0.279534
2000-01-05,0.451627,-0.244636,1.417464,-0.014465


In [14]:
pivoted.reset_index().melt(id_vars='date', var_name='type', value_name='value')

Unnamed: 0,date,type,value
0,2000-01-03,A,0.397221
1,2000-01-04,A,0.796207
2,2000-01-05,A,0.451627
3,2000-01-03,B,1.022632
4,2000-01-04,B,1.095201
5,2000-01-05,B,-0.244636
6,2000-01-03,C,-0.616154
7,2000-01-04,C,1.704806
8,2000-01-05,C,1.417464
9,2000-01-03,D,-0.964345


# MultiIndex DataFrames
这一章包含如下内容
* 创建一个MultiIndex
* 从一个MultiIndex 的DataFrame中选择rows和coumns
* 从 MultiIndex DataFrame 中提取截面数据
* 切换 MultiIndex 级别

维数是我们从数据结构中提取值所需的参考点数,我们从Series中提取数据时只需要一个label，我们从DataFrame中提取值需要两个label。 当我们需要更多维数时，就需要MultiIndex

`MultiIndex`是一个包含多个级别的index的对象

多级索引MultiIndex的使用

In [8]:
addresses = [
            ("8809 Flair Square", "Toddside", "IL", "37206"),
            ("9901 Austin Street", "Toddside", "IL", "37206"),
            ("905 Hogan Quarter", "Franklin", "IL", "37206"),
        ]
row_index = pd.MultiIndex.from_tuples(tuples = addresses, names=["Street", "City", "State", "Zip"])
row_index

MultiIndex([( '8809 Flair Square', 'Toddside', 'IL', '37206'),
            ('9901 Austin Street', 'Toddside', 'IL', '37206'),
            ( '905 Hogan Quarter', 'Franklin', 'IL', '37206')],
           names=['Street', 'City', 'State', 'Zip'])

In [9]:
data = [
    ["A", "B+"],
    ["C+", "C"],
    ["D-", "A"],
]
columns = ["Schools", "Cost of Living"]
area_grades = pd.DataFrame(data=data, index=row_index, columns=columns)
area_grades

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Schools,Cost of Living
Street,City,State,Zip,Unnamed: 4_level_1,Unnamed: 5_level_1
8809 Flair Square,Toddside,IL,37206,A,B+
9901 Austin Street,Toddside,IL,37206,C+,C
905 Hogan Quarter,Franklin,IL,37206,D-,A


上面的示例我们把MultiIndex放在行轴中。我们也可以放在列轴，Pandas也将DataFrame的列标题存储在索引对象中。我们可以通过columns属性访问该索引

In [16]:
area_grades.columns

Index(['Schools', 'Cost of Living'], dtype='object')

In [18]:
column_index = pd.MultiIndex.from_tuples(
             [
                 ("Culture", "Restaurants"),
                 ("Culture", "Museums"),
                 ("Services", "Police"),
                 ("Services", "Schools"),
             ]
         )
column_index

MultiIndex([( 'Culture', 'Restaurants'),
            ( 'Culture',     'Museums'),
            ('Services',      'Police'),
            ('Services',     'Schools')],
           )

In [19]:
data = [
            ["C-", "B+", "B-", "A"],
            ["D+", "C", "A", "C+"],
            ["A-", "A", "D+", "F"]
        ]
pd.DataFrame(
             data = data, index = row_index, columns = column_index
         )

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Restaurants,Museums,Police,Schools
Street,City,State,Zip,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
8809 Flair Square,Toddside,IL,37206,C-,B+,B-,A
9901 Austin Street,Toddside,IL,37206,D+,C,A,C+
905 Hogan Quarter,Franklin,IL,37206,A-,A,D+,F


In [89]:
neighborhoods = pd.read_csv("./input/neighbor/neighborhoods.csv", index_col=[0,1,2], header=[0,1])
neighborhoods

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO1,Fisherborough3,244 Tracy View,A+,A,A-,A+
MO1,Fisherborough3,245 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,246 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,247 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,248 Tracy View,C+,F,D-,A+
MO2,Fisherborough4,249 Tracy View,C+,F,D-,A+
MO2,Fisherborough4,250 Tracy View,C+,F,D-,A+
MO2,Fisherborough4,251 Tracy View,C+,F,D-,A+
MO2,Fisherborough4,252 Tracy View,C+,F,D-,A—
AR,Allisonland 124,Diaz Brooks,C-,A+,F,C+


In [64]:
neighborhoods.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 12 entries, ('MO1', 'Fisherborough3', '244 Tracy View') to ('IA', 'Amyburgh 163', 'Heather')
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   (Culture, Restaurants)  12 non-null     object
 1   (Culture, Museums)      12 non-null     object
 2   (Services, Police)      12 non-null     object
 3   (Services, Schools)     12 non-null     object
dtypes: object(4)
memory usage: 1.4+ KB


In [49]:
neighborhoods.index.names

FrozenList(['State', 'City', 'Street'])

In [50]:
neighborhoods.index.get_level_values(2)

Index(['244 Tracy View', '245 Tracy View', '246 Tracy View', '247 Tracy View',
       '248 Tracy View', '249 Tracy View', '250 Tracy View', '251 Tracy View',
       '252 Tracy View'],
      dtype='object', name='Street')

In [51]:
neighborhoods.columns.names

FrozenList([None, None])

In [52]:
neighborhoods.columns.names = ["Category", "Subcategory"]
neighborhoods.columns.names

FrozenList(['Category', 'Subcategory'])

In [53]:
neighborhoods.columns.get_level_values(0)

Index(['Culture', 'Culture', 'Services', 'Services'], dtype='object', name='Category')

In [54]:
neighborhoods.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Category,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Subcategory,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO1,Fisherborough1,244 Tracy View,C+,F,D-,A+
MO1,Fisherborough2,245 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,246 Tracy View,C+,F,D-,A+
MO1,Fisherborough4,247 Tracy View,C+,F,D-,A+
MO1,Fisherborough5,248 Tracy View,C+,F,D-,A+


In [57]:
neighborhoods.sort_index(ascending=[True, False, True]).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO1,Fisherborough3,244 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,245 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,246 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,247 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,248 Tracy View,C+,F,D-,A+


In [59]:
neighborhoods.sort_index(level=0).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO1,Fisherborough3,244 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,245 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,246 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,247 Tracy View,C+,F,D-,A+
MO1,Fisherborough3,248 Tracy View,C+,F,D-,A+


In [None]:
import pandas as pd

# 创建一个包含多级索引的数据框
data = {'销售额': [100, 200, 150, 250, 300, 350],
        '销售量': [10, 20, 15, 25, 30, 35]}
index = pd.MultiIndex.from_tuples([('东区', '苹果'), ('东区', '梨子'), ('东区', '香蕉'),
                                   ('西区', '苹果'), ('西区', '梨子'), ('西区', '香蕉')],
                                  names=['区域', '产品'])
df = pd.DataFrame(data, index=index)

# 输出结果
print(df)

       销售额  销售量
区域 产品          
东区 苹果  100   10
   梨子  200   20
   香蕉  150   15
西区 苹果  250   25
   梨子  300   30
   香蕉  350   35


## Selecting with a MultiIndex
使用tuple来选择特定的列

In [69]:
neighborhoods['Services']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Police,Schools
State,City,Street,Unnamed: 3_level_1,Unnamed: 4_level_1
MO1,Fisherborough3,244 Tracy View,D-,A+
MO1,Fisherborough3,245 Tracy View,D-,A+
MO1,Fisherborough3,246 Tracy View,D-,A+
MO1,Fisherborough3,247 Tracy View,D-,A+
MO1,Fisherborough3,248 Tracy View,D-,A+
MO2,Fisherborough4,249 Tracy View,D-,A+
MO2,Fisherborough4,250 Tracy View,D-,A+
MO2,Fisherborough4,251 Tracy View,D-,A+
MO2,Fisherborough4,252 Tracy View,D-,A—
AR,Allisonland 124,Diaz Brooks,F,C+


In [68]:
neighborhoods[('Services', "Schools")]

State  City             Street        
MO1    Fisherborough3   244 Tracy View    A+
                        245 Tracy View    A+
                        246 Tracy View    A+
                        247 Tracy View    A+
                        248 Tracy View    A+
MO2    Fisherborough4   249 Tracy View    A+
                        250 Tracy View    A+
                        251 Tracy View    A+
                        252 Tracy View    A—
AR     Allisonland 124  Diaz Brooks       C+
GA     Amyburgh 941     Brian Ex          C+
IA     Amyburgh 163     Heather           A-
Name: (Services, Schools), dtype: object

In [71]:
neighborhoods[[('Services', "Schools"), ("Culture", "Museums")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Services,Culture
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Schools,Museums
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2
MO1,Fisherborough3,244 Tracy View,A+,F
MO1,Fisherborough3,245 Tracy View,A+,F
MO1,Fisherborough3,246 Tracy View,A+,F
MO1,Fisherborough3,247 Tracy View,A+,F
MO1,Fisherborough3,248 Tracy View,A+,F
MO2,Fisherborough4,249 Tracy View,A+,F
MO2,Fisherborough4,250 Tracy View,A+,F
MO2,Fisherborough4,251 Tracy View,A+,F
MO2,Fisherborough4,252 Tracy View,A—,F
AR,Allisonland 124,Diaz Brooks,C+,A+


In [74]:
neighborhoods.loc["MO1"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Restaurants,Museums,Police,Schools
City,Street,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Fisherborough3,244 Tracy View,C+,F,D-,A+
Fisherborough3,245 Tracy View,C+,F,D-,A+
Fisherborough3,246 Tracy View,C+,F,D-,A+
Fisherborough3,247 Tracy View,C+,F,D-,A+
Fisherborough3,248 Tracy View,C+,F,D-,A+


In [77]:
neighborhoods.loc["MO2", "Fisherborough4"]

  neighborhoods.loc["MO2", "Fisherborough4"]


Unnamed: 0_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Restaurants,Museums,Police,Schools
Street,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
249 Tracy View,C+,F,D-,A+
250 Tracy View,C+,F,D-,A+
251 Tracy View,C+,F,D-,A+
252 Tracy View,C+,F,D-,A—


In [78]:
neighborhoods.loc[("MO2", "Fisherborough4")]

  neighborhoods.loc[("MO2", "Fisherborough4")]


Unnamed: 0_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Restaurants,Museums,Police,Schools
Street,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
249 Tracy View,C+,F,D-,A+
250 Tracy View,C+,F,D-,A+
251 Tracy View,C+,F,D-,A+
252 Tracy View,C+,F,D-,A—


In [73]:
neighborhoods.loc[("MO1","Fisherborough3", "244 Tracy View")]

Culture   Restaurants    C+
          Museums         F
Services  Police         D-
          Schools        A+
Name: (MO1, Fisherborough3, 244 Tracy View), dtype: object

定位到具体的列

In [81]:
neighborhoods.loc[("MO1","Fisherborough3"), ("Services")]

  neighborhoods.loc[("MO1","Fisherborough3"), ("Services")]


Unnamed: 0_level_0,Police,Schools
Street,Unnamed: 1_level_1,Unnamed: 2_level_1
244 Tracy View,D-,A+
245 Tracy View,D-,A+
246 Tracy View,D-,A+
247 Tracy View,D-,A+
248 Tracy View,D-,A+


In [99]:
neighborhoods.iloc[0:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Culture,Culture,Services,Services
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Restaurants,Museums,Police,Schools
State,City,Street,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MO1,Fisherborough3,244 Tracy View,A+,A,A-,A+
MO1,Fisherborough3,245 Tracy View,C+,F,D-,A+
