In [1]:
import pandas as pd

In [4]:
#  資料集：麥當勞大麥克指標
#  將date轉為日期格式
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'])
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [5]:
#  確認型別
bigmac.dtypeses

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [6]:
#  確認資料集狀況
#  沒有null
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
Date                   652 non-null datetime64[ns]
Country                652 non-null object
Price in US Dollars    652 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


## Create A MultiIndex with .set_index() Method

In [7]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'])
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [8]:
#  透過set_index並提供欄位為參數可以設置index
bigmac.set_index('Date').head()

Unnamed: 0_level_0,Country,Price in US Dollars
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [12]:
#  透過set_index並提供list為參數可以設置複合索引
#  索引順序依list順序
bigmac.set_index(['Date','Country'], inplace=True)
bigmac.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
2016-01-01,Chile,2.94
2016-01-01,China,2.68
2016-01-01,Colombia,2.43
2016-01-01,Costa Rica,4.02
2016-01-01,Czech Republic,2.98


In [14]:
bigmac.sort_index(inplace=True)
bigmac.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
2010-01-01,Chile,3.18
2010-01-01,China,1.83
2010-01-01,Colombia,3.91
2010-01-01,Costa Rica,3.52
2010-01-01,Czech Republic,3.71


In [16]:
#  確認目前的索引
bigmac.index.names

FrozenList(['Date', 'Country'])

In [17]:
type(bigmac.index)

pandas.core.indexes.multi.MultiIndex

In [18]:
bigmac.index[0]

(Timestamp('2010-01-01 00:00:00'), 'Argentina')

## .get_level_values() Method

In [4]:
#  讀入資料時利用index_col參數賦值list格式做複合索引
#  排序索引可以有效提高處理資料的效率
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [7]:
#  利用get_level_values取得相對應階層的索引值
#  0為第一階，1為第2階...
#  此例第一階為日期，第二階為城市
bigmac.index.get_level_values(0)
#  bigmac.index.get_level_values('Date')

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

In [6]:
bigmac.index.get_level_values(1)
#  bigmac.index.get_level_values('Country')

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

## .set_names() Methods

In [8]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [9]:
#  利用set_names改變索引名字
bigmac.index.set_names(['Dat','Location'],inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Dat,Location,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


## .sort_index() Method on MultiIndex

In [10]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [14]:
#  利用list來控制複合索引的排序
bigmac.sort_index(ascending=[True,False]).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Uruguay,3.32
2010-01-01,United States,3.58
2010-01-01,Ukraine,1.83
2010-01-01,UAE,2.99
2010-01-01,Turkey,3.83
2010-01-01,Thailand,2.11
2010-01-01,Taiwan,2.36
2010-01-01,Switzerland,6.3
2010-01-01,Sweden,5.51
2010-01-01,Sri Lanka,1.83


## Extract Rows from MultiIndex DataFrame

In [3]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [10]:
#  利用loc提取指定索引資料
bigmac.loc[('2010-01-01')]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
2010-01-01,Chile,3.18
2010-01-01,China,1.83
2010-01-01,Colombia,3.91
2010-01-01,Costa Rica,3.52
2010-01-01,Czech Republic,3.71


In [11]:
#  利用loc提取指定索引資料，也可以多索引指定第二層索引
bigmac.loc[('2010-01-01', 'Egypt')]

Price in US Dollars    2.38
Name: (2010-01-01 00:00:00, Egypt), dtype: float64

In [12]:
#  loc第一個參數是橫軸(index)，第二個參數是縱軸(column)
bigmac.loc[('2010-01-01', 'Egypt'), 'Price in US Dollars']

Date        Country
2010-01-01  Egypt      2.38
Name: Price in US Dollars, dtype: float64

## .transpose() Method

In [17]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [18]:
#  transpose()索引轉置
#  transpose沒有inplace參數，直接重新賦值
bigmac = bigmac.transpose()
bigmac.head()

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [20]:
#  利用loc提取相關資料，隨著轉置索引也變動了
bigmac.loc['Price in US Dollars', '2010-01-01']

Date        Country       
2010-01-01  Argentina         1.84
            Australia         3.98
            Brazil            4.76
            Britain           3.67
            Canada            3.97
            Chile             3.18
            China             1.83
            Colombia          3.91
            Costa Rica        3.52
            Czech Republic    3.71
            Denmark           5.99
            Egypt             2.38
            Euro area         4.84
            Hong Kong         1.91
            Hungary           3.86
            Indonesia         2.24
            Israel            3.99
            Japan             3.50
            Latvia            3.09
            Lithuania         2.87
            Malaysia          2.08
            Mexico            2.50
            New Zealand       3.61
            Norway            7.02
            Pakistan          2.42
            Peru              2.81
            Philippines       2.21
            Poland          

## .swaplevel() Method

In [21]:
bigmac = pd.read_csv('bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [24]:
#  利用swaplevel變更索引，索引超過2個就需調整參數處理
bigmac.swaplevel().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76
Britain,2010-01-01,3.67
Canada,2010-01-01,3.97


## .stack() Method

In [27]:
#  城市、人口、gdp的資料集
world =pd.read_csv('worldstats.csv', index_col=['country', 'year'])
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0


In [28]:
#  利用stack調整column的視角, unpivot
world.stack()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
                  GDP           2.846994e+12
            2012  Population    3.688026e+08
                  GDP           2.773270e+12
            2011  Population    3.610318e+08
                  GDP           2.497945e+12
            2010  Population    3.531122e+08
                  GDP           2.103825e+12
            2009  Population    3.450542e+08
                  GDP           1.798878e+12
            2008  Population    3.368865e+08
                  GDP           2.081343e+12
            2007  Population    3.287666e+08
                  GDP           1.641666e+12
            2006  Population    3.209067e+08
                  GDP           1.404190e+12
            2005  Population    3.134309e+08
                  GDP     

In [29]:
#  資料型態為series
type(world.stack())

pandas.core.series.Series

In [30]:
#  利用to_frame來轉型
world.stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,Population,3.920223e+08
Arab World,2015,GDP,2.530102e+12
Arab World,2014,Population,3.842226e+08
Arab World,2014,GDP,2.873600e+12
Arab World,2013,Population,3.765043e+08
Arab World,2013,GDP,2.846994e+12
Arab World,2012,Population,3.688026e+08
Arab World,2012,GDP,2.773270e+12
Arab World,2011,Population,3.610318e+08
Arab World,2011,GDP,2.497945e+12


## .unstack() Method

In [2]:
world =pd.read_csv('worldstats.csv', index_col=['country', 'year'])
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0


In [6]:
#  先利用stack將資料轉置
s = world.stack()
s.head()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
dtype: float64

In [8]:
#  可利用unstack還原
s.unstack().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1960,8994793.0,537777800.0
Afghanistan,1961,9164945.0,548888900.0
Afghanistan,1962,9343772.0,546666700.0
Afghanistan,1963,9531555.0,751111200.0
Afghanistan,1964,9728645.0,800000000.0


In [9]:
#  這時候還有year，可以利用串列指令再將year轉置
#  此時column的部份有兩層與兩部份，population與gdp
s.unstack().unstack().head()

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,...,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP
year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Afghanistan,8994793.0,9164945.0,9343772.0,9531555.0,9728645.0,9935358.0,10148841.0,10368600.0,10599790.0,10849510.0,...,7057598000.0,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19199440000.0
Albania,,,,,,,,,,,...,8992642000.0,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12781030000.0,13277960000.0,11455600000.0
Algeria,11124892.0,11404859.0,11690152.0,11985130.0,12295973.0,12626953.0,12980269.0,13354197.0,13744383.0,14144437.0,...,117027300000.0,134977100000.0,171000700000.0,137211000000.0,161207300000.0,200013100000.0,209047400000.0,209703500000.0,213518500000.0,166838600000.0
Andorra,,,,,,,,,,,...,3536452000.0,4010785000.0,4001349000.0,3649863000.0,3346317000.0,3427236000.0,3146178000.0,3249101000.0,,
Angola,,,,,,,,,,,...,41789480000.0,60448920000.0,84178030000.0,75492380000.0,82470910000.0,104115900000.0,115398400000.0,124912100000.0,126775100000.0,102643100000.0


In [10]:
#  再做一次，就會整個解構掉
s.unstack().unstack().unstack()

            year  country               
Population  1960  Afghanistan               8.994793e+06
                  Albania                            NaN
                  Algeria                   1.112489e+07
                  Andorra                            NaN
                  Angola                             NaN
                  Antigua and Barbuda                NaN
                  Arab World                         NaN
                  Argentina                          NaN
                  Armenia                            NaN
                  Aruba                              NaN
                  Australia                 1.027648e+07
                  Austria                   7.047539e+06
                  Azerbaijan                         NaN
                  Bahamas, The              1.095260e+05
                  Bahrain                            NaN
                  Bangladesh                4.820070e+07
                  Barbados                     

## .unstack() Methoad

In [11]:
world =pd.read_csv('worldstats.csv', index_col=['country', 'year'])
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0


In [15]:
s = world.stack()
s.head()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
dtype: float64

In [17]:
#  unstack會從最內層開始，此例來看即為人口數與gdp
#  畫面上我們看stack之後的資料是有索引的，country為0、year為1、最後的人口與gdp為2
#  透可索引設置我們可以調整解構的狀態
s.unstack(2).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1960,8994793.0,537777800.0
Afghanistan,1961,9164945.0,548888900.0
Afghanistan,1962,9343772.0,546666700.0
Afghanistan,1963,9531555.0,751111200.0
Afghanistan,1964,9728645.0,800000000.0


In [19]:
s.unstack(0).head()

Unnamed: 0_level_0,country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Arab World,Argentina,Armenia,Aruba,...,Uzbekistan,Vanuatu,"Venezuela, RB",Vietnam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1960,Population,8994793.0,,11124890.0,,,,,,,,...,,,8146845.0,,32000.0,,3035056000.0,,3049586.0,3752390.0
1960,GDP,537777800.0,,2723638000.0,,,,,,,,...,,,8607600000.0,,24200000.0,,1364643000000.0,,698739700.0,1052990000.0
1961,Population,9164945.0,,11404860.0,,,,,,,,...,,,8461684.0,,34100.0,,3076121000.0,,3142848.0,3876638.0
1961,GDP,548888900.0,,2434767000.0,,,,,,,,...,,,8923367000.0,,25700000.0,,1420440000000.0,,682359700.0,1096647000.0
1962,Population,9343772.0,,11690150.0,,,,,21287682.0,,,...,,,8790590.0,,36300.0,,3129064000.0,,3240664.0,4006262.0


In [21]:
#  索引同list，也可以用負索引
s.unstack(-2).head()

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8994793.0,9164945.0,9343772.0,9531555.0,9728645.0,9935358.0,10148840.0,10368600.0,10599790.0,10849510.0,...,25183620.0,25877540.0,26528740.0,27207290.0,27962210.0,28809170.0,29726800.0,30682500.0,31627510.0,32526560.0
Afghanistan,GDP,537777800.0,548888900.0,546666700.0,751111200.0,800000000.0,1006667000.0,1400000000.0,1673333000.0,1373333000.0,1408889000.0,...,7057598000.0,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19199440000.0
Albania,Population,,,,,,,,,,,...,2992547.0,2970017.0,2947314.0,2927519.0,2913021.0,2904780.0,2900247.0,2896652.0,2893654.0,2889167.0
Albania,GDP,,,,,,,,,,,...,8992642000.0,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12781030000.0,13277960000.0,11455600000.0
Algeria,Population,11124890.0,11404860.0,11690150.0,11985130.0,12295970.0,12626950.0,12980270.0,13354200.0,13744380.0,14144440.0,...,33749330.0,34261970.0,34811060.0,35401790.0,36036160.0,36717130.0,37439430.0,38186140.0,38934330.0,39666520.0


In [23]:
#  也可以用索引名稱來指定
s.unstack('year').head()

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8994793.0,9164945.0,9343772.0,9531555.0,9728645.0,9935358.0,10148840.0,10368600.0,10599790.0,10849510.0,...,25183620.0,25877540.0,26528740.0,27207290.0,27962210.0,28809170.0,29726800.0,30682500.0,31627510.0,32526560.0
Afghanistan,GDP,537777800.0,548888900.0,546666700.0,751111200.0,800000000.0,1006667000.0,1400000000.0,1673333000.0,1373333000.0,1408889000.0,...,7057598000.0,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19199440000.0
Albania,Population,,,,,,,,,,,...,2992547.0,2970017.0,2947314.0,2927519.0,2913021.0,2904780.0,2900247.0,2896652.0,2893654.0,2889167.0
Albania,GDP,,,,,,,,,,,...,8992642000.0,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12781030000.0,13277960000.0,11455600000.0
Algeria,Population,11124890.0,11404860.0,11690150.0,11985130.0,12295970.0,12626950.0,12980270.0,13354200.0,13744380.0,14144440.0,...,33749330.0,34261970.0,34811060.0,35401790.0,36036160.0,36717130.0,37439430.0,38186140.0,38934330.0,39666520.0


## .unstack() Method

In [25]:
world =pd.read_csv('worldstats.csv', index_col=['country', 'year'])
s=world.stack()
s.head()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
dtype: float64

In [27]:
#  除了利用單一索引來解構之外，也可以使用list
#  順序依list填寫順序
s.unstack(level=[1,0]).head()

year,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,...,1969,1968,1967,1966,1965,1964,1963,1962,1961,1960
country,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,...,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
Population,392022300.0,384222600.0,376504300.0,368802600.0,361031800.0,353112200.0,345054200.0,336886500.0,328766600.0,320906700.0,...,5036321.0,4874113.0,4718612.0,4568320.0,4422132.0,4279561.0,4140804.0,4006262.0,3876638.0,3752390.0
GDP,2530102000000.0,2873600000000.0,2846994000000.0,2773270000000.0,2497945000000.0,2103825000000.0,1798878000000.0,2081343000000.0,1641666000000.0,1404190000000.0,...,1747999000.0,1479600000.0,1397002000.0,1281750000.0,1311436000.0,1217138000.0,1159512000.0,1117602000.0,1096647000.0,1052990000.0


In [28]:
s.unstack(level=[0,1]).head()

country,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,...,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
year,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,...,1969,1968,1967,1966,1965,1964,1963,1962,1961,1960
Population,392022300.0,384222600.0,376504300.0,368802600.0,361031800.0,353112200.0,345054200.0,336886500.0,328766600.0,320906700.0,...,5036321.0,4874113.0,4718612.0,4568320.0,4422132.0,4279561.0,4140804.0,4006262.0,3876638.0,3752390.0
GDP,2530102000000.0,2873600000000.0,2846994000000.0,2773270000000.0,2497945000000.0,2103825000000.0,1798878000000.0,2081343000000.0,1641666000000.0,1404190000000.0,...,1747999000.0,1479600000.0,1397002000.0,1281750000.0,1311436000.0,1217138000.0,1159512000.0,1117602000.0,1096647000.0,1052990000.0


In [29]:
#  可以利用fill_value這參數來將null填滿值
#  這只是單純的呈現，並不會對數據有真正的處理
#  如果要真正的處理數據就需要重新賦值
s.unstack('year', fill_value=0).head(100)

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,2.518362e+07,2.587754e+07,2.652874e+07,2.720729e+07,2.796221e+07,2.880917e+07,2.972680e+07,3.068250e+07,3.162751e+07,3.252656e+07
Afghanistan,GDP,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,Population,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,2.992547e+06,2.970017e+06,2.947314e+06,2.927519e+06,2.913021e+06,2.904780e+06,2.900247e+06,2.896652e+06,2.893654e+06,2.889167e+06
Albania,GDP,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,Population,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,3.374933e+07,3.426197e+07,3.481106e+07,3.540179e+07,3.603616e+07,3.671713e+07,3.743943e+07,3.818614e+07,3.893433e+07,3.966652e+07
Algeria,GDP,2.723638e+09,2.434767e+09,2.001461e+09,2.703004e+09,2.909340e+09,3.136284e+09,3.039859e+09,3.370870e+09,3.852147e+09,4.257253e+09,...,1.170273e+11,1.349771e+11,1.710007e+11,1.372110e+11,1.612073e+11,2.000131e+11,2.090474e+11,2.097035e+11,2.135185e+11,1.668386e+11
Andorra,Population,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,8.337300e+04,8.487800e+04,8.561600e+04,8.547400e+04,8.441900e+04,8.232600e+04,7.931600e+04,7.590200e+04,0.000000e+00,0.000000e+00
Andorra,GDP,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,3.536452e+09,4.010785e+09,4.001349e+09,3.649863e+09,3.346317e+09,3.427236e+09,3.146178e+09,3.249101e+09,0.000000e+00,0.000000e+00
Angola,Population,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,1.854147e+07,1.918391e+07,1.984225e+07,2.052010e+07,2.121995e+07,2.194230e+07,2.268563e+07,2.344820e+07,2.422752e+07,2.502197e+07
Angola,GDP,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,4.178948e+10,6.044892e+10,8.417803e+10,7.549238e+10,8.247091e+10,1.041159e+11,1.153984e+11,1.249121e+11,1.267751e+11,1.026431e+11


## pivot Method

In [5]:
#  這是一個推銷員為公司創造的虛擬收入資料集
#  將date確實的轉為日期格式，利用parse_dates
sales = pd.read_csv('salesmen.csv', parse_dates=['Date'])
sales.head()

Unnamed: 0,Date,Salesman,Revenue
0,2016-01-01,Bob,7172
1,2016-01-02,Bob,6362
2,2016-01-03,Bob,5982
3,2016-01-04,Bob,7917
4,2016-01-05,Bob,7837


In [6]:
#  資料總筆數
len(sales)

1830

In [9]:
#  查詢salesman欄位的各資料筆數
#  可以發現只有五個業務，這很適合將欄位調整為category
sales['Salesman'].value_counts()

Ronald    366
Oscar     366
Dave      366
Bob       366
Jeb       366
Name: Salesman, dtype: int64

In [11]:
sales['Salesman'] = sales['Salesman'].astype('category')

In [None]:
#  後續就可以以此程式碥載入資料集
sales = pd.read_csv('salesmen.csv', parse_dates=['Date'])
sales['Salesman'] = sales['Salesman'].astype('category')
sales.head()

In [13]:
#  利用pivot來轉置資料
#  依序設置索引，欄位以及值來源欄位
sales.pivot(index='Date', columns='Salesman', values='Revenue').head(100)

Salesman,Bob,Dave,Jeb,Oscar,Ronald
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,7172,1864,4430,5250,2639
2016-01-02,6362,8278,8026,8661,4951
2016-01-03,5982,4226,5188,7075,2703
2016-01-04,7917,3868,3144,2524,4258
2016-01-05,7837,2287,938,2793,7771
2016-01-06,1744,7859,8702,7794,5930
2016-01-07,918,8597,4250,9728,1933
2016-01-08,9863,3092,9719,5263,5709
2016-01-09,8337,1794,5614,7144,4707
2016-01-10,7543,7105,301,7663,8267


## pivot_table Method

In [17]:
#  這是一個虛擬餐廳的消費數據清單
foods = pd.read_csv('foods.csv')
foods.head()

Unnamed: 0,First Name,Gender,City,Frequency,Item,Spend
0,Wanda,Female,Stamford,Weekly,Burger,15.66
1,Eric,Male,Stamford,Daily,Chalupa,10.56
2,Charles,Male,New York,Never,Sushi,42.14
3,Anna,Female,Philadelphia,Once,Ice Cream,11.01
4,Deborah,Female,Philadelphia,Daily,Chalupa,23.49


In [19]:
#  aggfunc：統計方式，平均(mean)，加總(sum)、次數(count),最大值(max),最小值(min)
foods.pivot_table(values='Spend', index='Gender', aggfunc='mean')

Unnamed: 0_level_0,Spend
Gender,Unnamed: 1_level_1
Female,50.709629
Male,49.397623


In [21]:
#  也可以複合索引
foods.pivot_table(values='Spend', index=['Gender','Item'], aggfunc='mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,Spend
Gender,Item,Unnamed: 2_level_1
Female,Burger,49.930488
Female,Burrito,50.092
Female,Chalupa,54.635
Female,Donut,49.926316
Female,Ice Cream,49.788519
Female,Sushi,50.355699
Male,Burger,49.613919
Male,Burrito,48.344819
Male,Chalupa,49.186761
Male,Donut,43.649565


In [22]:
#  加入column做更細緻的分析
foods.pivot_table(values='Spend', index=['Gender','Item'], columns='City', aggfunc='mean')

Unnamed: 0_level_0,City,New York,Philadelphia,Stamford
Gender,Item,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,Burger,51.626667,52.87871,45.037778
Female,Burrito,42.563043,52.098571,53.532647
Female,Chalupa,46.135789,52.291563,64.094
Female,Donut,46.670323,54.642,48.734118
Female,Ice Cream,56.356296,46.225625,46.910455
Female,Sushi,47.75129,58.096,45.622187
Male,Burger,58.822273,44.675238,46.424516
Male,Burrito,55.976,43.764333,46.438929
Male,Chalupa,49.1108,48.444783,50.011304
Male,Donut,44.842333,37.859394,49.004483


In [23]:
#  column也可以複合設置
foods.pivot_table(values='Spend', index=['Gender','Item'], columns=['City','Frequency'], aggfunc='mean')

Unnamed: 0_level_0,City,New York,New York,New York,New York,New York,New York,New York,New York,Philadelphia,Philadelphia,Philadelphia,Philadelphia,Philadelphia,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford
Unnamed: 0_level_1,Frequency,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly,Daily,Monthly,...,Weekly,Yearly,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly
Gender,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,Burger,43.778333,57.286667,97.89,23.74,52.425,31.58,92.175,64.825,77.226667,53.7625,...,16.0,61.585,48.22,59.6225,45.485,43.635,31.683333,48.765,31.004,51.171667
Female,Burrito,44.89,40.913333,47.4325,34.533333,59.94,83.77,13.23,35.63,53.595,17.14,...,31.41,38.916667,39.126,67.94,52.334286,63.968333,56.003333,78.163333,46.182,43.245
Female,Chalupa,43.19,79.185,35.15,39.73,36.933333,40.0,42.88,52.606667,23.49,72.49,...,28.136667,56.048889,95.7,80.99,52.12,59.945714,40.59,58.416667,68.23,69.632
Female,Donut,39.841667,71.1325,56.07,32.6575,29.065,30.27,71.39,62.95,61.85,50.25,...,69.6,58.41,41.45,45.86,52.443333,36.423333,79.12,34.886667,55.0075,56.12
Female,Ice Cream,65.5475,46.265,68.716667,58.065,35.616667,80.783333,56.905,37.9175,59.23,37.255,...,47.546667,39.965,46.44,41.95,77.66,66.103333,55.866,58.865,25.006,15.24
Female,Sushi,40.535,46.58,69.33,49.134286,32.358,87.7,51.36125,46.482,58.088333,78.71,...,55.666667,48.616667,56.181429,54.195,19.56,19.01,38.95,27.82,52.56,45.3425
Male,Burger,63.892,62.43,90.32,27.735,59.314,75.226667,69.69,24.805,37.566667,71.046667,...,33.296667,49.34,49.43,13.58,,48.1525,36.293333,53.25,77.5525,45.014
Male,Burrito,78.736667,49.18,28.926667,47.48,59.11,67.466667,64.185,55.175,41.44,29.86,...,48.208333,59.255,69.0575,39.866667,70.368,88.14,15.075,9.84,40.4625,32.83
Male,Chalupa,27.045,66.752,39.818,62.88,23.34,11.69,54.4,55.913333,68.7025,45.35,...,33.92,34.405,48.16,57.293333,46.233333,70.29,,34.804,44.37,58.095
Male,Donut,46.0,45.9325,43.926,46.6,56.386667,16.25,37.22,35.775,47.6775,51.858,...,38.6,22.305,64.71,29.8825,54.91,80.3875,27.978,40.8275,62.254,16.52


In [24]:
#  pandas本身也有pivot_table這函數，只需要指定資料集就可以
pd.pivot_table(data=foods, values='Spend', index=['Gender','Item'], columns=['City','Frequency'], aggfunc='mean')

Unnamed: 0_level_0,City,New York,New York,New York,New York,New York,New York,New York,New York,Philadelphia,Philadelphia,Philadelphia,Philadelphia,Philadelphia,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford,Stamford
Unnamed: 0_level_1,Frequency,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly,Daily,Monthly,...,Weekly,Yearly,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly
Gender,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,Burger,43.778333,57.286667,97.89,23.74,52.425,31.58,92.175,64.825,77.226667,53.7625,...,16.0,61.585,48.22,59.6225,45.485,43.635,31.683333,48.765,31.004,51.171667
Female,Burrito,44.89,40.913333,47.4325,34.533333,59.94,83.77,13.23,35.63,53.595,17.14,...,31.41,38.916667,39.126,67.94,52.334286,63.968333,56.003333,78.163333,46.182,43.245
Female,Chalupa,43.19,79.185,35.15,39.73,36.933333,40.0,42.88,52.606667,23.49,72.49,...,28.136667,56.048889,95.7,80.99,52.12,59.945714,40.59,58.416667,68.23,69.632
Female,Donut,39.841667,71.1325,56.07,32.6575,29.065,30.27,71.39,62.95,61.85,50.25,...,69.6,58.41,41.45,45.86,52.443333,36.423333,79.12,34.886667,55.0075,56.12
Female,Ice Cream,65.5475,46.265,68.716667,58.065,35.616667,80.783333,56.905,37.9175,59.23,37.255,...,47.546667,39.965,46.44,41.95,77.66,66.103333,55.866,58.865,25.006,15.24
Female,Sushi,40.535,46.58,69.33,49.134286,32.358,87.7,51.36125,46.482,58.088333,78.71,...,55.666667,48.616667,56.181429,54.195,19.56,19.01,38.95,27.82,52.56,45.3425
Male,Burger,63.892,62.43,90.32,27.735,59.314,75.226667,69.69,24.805,37.566667,71.046667,...,33.296667,49.34,49.43,13.58,,48.1525,36.293333,53.25,77.5525,45.014
Male,Burrito,78.736667,49.18,28.926667,47.48,59.11,67.466667,64.185,55.175,41.44,29.86,...,48.208333,59.255,69.0575,39.866667,70.368,88.14,15.075,9.84,40.4625,32.83
Male,Chalupa,27.045,66.752,39.818,62.88,23.34,11.69,54.4,55.913333,68.7025,45.35,...,33.92,34.405,48.16,57.293333,46.233333,70.29,,34.804,44.37,58.095
Male,Donut,46.0,45.9325,43.926,46.6,56.386667,16.25,37.22,35.775,47.6775,51.858,...,38.6,22.305,64.71,29.8825,54.91,80.3875,27.978,40.8275,62.254,16.52


## pandas.melt() Method

In [25]:
#  這是一個業務業績表
sales = pd.read_csv('quarters.csv')
sales

Unnamed: 0,Salesman,Q1,Q2,Q3,Q4
0,Boris,602908,233879,354479,32704
1,Bob,43790,514863,297151,544493
2,Tommy,392668,113579,430882,247231
3,Travis,834663,266785,749238,570524
4,Donald,580935,411379,110390,651572
5,Ted,656644,70803,375948,321388
6,Jeb,486141,600753,742716,404995
7,Stacy,479662,742806,770712,2501
8,Morgan,992673,879183,37945,293710


In [27]:
#  類似於unpivot的操作，將columns轉為row，再細部調整為list，而非table
pd.melt(sales)

Unnamed: 0,variable,value
0,Salesman,Boris
1,Salesman,Bob
2,Salesman,Tommy
3,Salesman,Travis
4,Salesman,Donald
5,Salesman,Ted
6,Salesman,Jeb
7,Salesman,Stacy
8,Salesman,Morgan
9,Q1,602908


In [29]:
#  透過id_vars來設置保留的row索引，並且看到了變數剩下Q1~Q4
pd.melt(sales, id_vars='Salesman').head(50)

Unnamed: 0,Salesman,variable,value
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879


In [31]:
#  利用value_name設置value的欄位名稱
#  利用var_name設置variable的欄位名稱
pd.melt(sales, id_vars='Salesman', var_name='Quarter', value_name='Revenue')

Unnamed: 0,Salesman,Quarter,Revenue
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879
