# Study Summary

1. 欄位轉索引 stack()

In [2]:
import pandas as pd

In [3]:
# 設定dataframe 的欄位名稱
multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'),
                                       ('weight', 'pounds')])

# 設定dataframe 的index, columns
df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]],
                                    index=['cat', 'dog'],
                                    columns=multicol1)
df_multi_level_cols1

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,kg,pounds
cat,1,2
dog,2,4


1. 欄位轉索引 stack()

In [4]:
df_multi_level_cols1.stack()

Unnamed: 0,Unnamed: 1,weight
cat,kg,1
cat,pounds,2
dog,kg,2
dog,pounds,4


1-1. 觀察stack()前後差異

In [5]:
df_multi_level_cols1.index, df_multi_level_cols1.stack().index

(Index(['cat', 'dog'], dtype='object'),
 MultiIndex([('cat',     'kg'),
             ('cat', 'pounds'),
             ('dog',     'kg'),
             ('dog', 'pounds')],
            ))

1-2. 一直stack()的變化

In [6]:
df_multi_level_cols1.stack().stack() #第二次stack

cat  kg      weight    1
     pounds  weight    2
dog  kg      weight    2
     pounds  weight    4
dtype: int64

In [7]:
df_multi_level_cols1.stack().stack().stack() #第三次stack

AttributeError: 'Series' object has no attribute 'stack'

2. 索引轉欄位 unstack()

In [8]:
df_multi_level_cols1.stack().unstack()

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,kg,pounds
cat,1,2
dog,2,4


2-1. 觀察unstack()前後差異

In [10]:
df_multi_level_cols1.stack(), df_multi_level_cols1.stack().unstack().index

(            weight
 cat kg           1
     pounds       2
 dog kg           2
     pounds       4,
 Index(['cat', 'dog'], dtype='object'))

2-2. 一直unstack()的變化

In [11]:
df_multi_level_cols1.stack().unstack().unstack()

weight  kg      cat    1
                dog    2
        pounds  cat    2
                dog    4
dtype: int64

In [12]:
df_multi_level_cols1.stack().unstack().unstack().unstack()

Unnamed: 0,Unnamed: 1,cat,dog
weight,kg,1,2
weight,pounds,2,4


結論: unstack() 索引轉欄位，當沒有索引可轉換時，會將所有的欄位轉換成索引，再將索引轉欄位，不斷循環

3. 數據分析的時候經常要寬數據變成長數據，使用.melt() column轉成value(variable)，原value轉為value
   id_vars：不需要被轉換的列名
   value_vars：需要轉換的列名，其餘拿掉。如果剩下的列全部都要轉換，就不用寫了。

In [13]:
import pandas as pd 

df = pd.DataFrame({'Name':{0:'Jiao', 1:'John', 2:'Shiela'}, 
                   'course':{0:'python', 1:'ML', 2:'DM'}, 
                   'Age':{0:22, 1:15, 2:50}})

df

Unnamed: 0,Name,course,Age
0,Jiao,python,22
1,John,ML,15
2,Shiela,DM,50


In [14]:
df.melt()

Unnamed: 0,variable,value
0,Name,Jiao
1,Name,John
2,Name,Shiela
3,course,python
4,course,ML
5,course,DM
6,Age,22
7,Age,15
8,Age,50


In [15]:
#保留 Name 欄位其餘轉成欄位值
df.melt('Name')

Unnamed: 0,Name,variable,value
0,Jiao,course,python
1,John,course,ML
2,Shiela,course,DM
3,Jiao,Age,22
4,John,Age,15
5,Shiela,Age,50


3-1. 參數應用
     id_vars：不需要被轉換的列名
     value_vars：需要轉換的列名，其餘拿掉。如果剩下的列全部都要轉換，就不用寫了。

In [21]:
# id_vars 保留column 'Name'，其餘轉換成variable and value
df.melt(id_vars='Name')

Unnamed: 0,Name,variable,value
0,Jiao,course,python
1,John,course,ML
2,Shiela,course,DM
3,Jiao,Age,22
4,John,Age,15
5,Shiela,Age,50


In [22]:
#只轉換 Name 欄位，其餘拿掉
df.melt(value_vars='Name')

Unnamed: 0,variable,value
0,Name,Jiao
1,Name,John
2,Name,Shiela


4. 做資料分析時很常要重新組織資料，在裡面最靈活好用的就是 .pivot() 函數
   .pivot() 函數根據給定的索引/列值重新組織DataFrame
   
   參數: index：新資料的索引名稱
         columns：新資料的欄位名稱
         values：新資料的值名稱

In [23]:
import pandas as pd

piv = pd.DataFrame({'fff':{0:'one', 1:'one', 2:'one', 3: 'two', 4: 'two', 5: 'two'}, 
                   'bbb':{0:'P', 1:'Q', 2:'R', 3:'P', 4:'Q', 5:'R'}, 
                   'baa':{0:2, 1:3, 2:4, 3:5, 4:6, 5:7},
                   'zzz':{0:'h', 1:'i', 2:'j', 3:'k', 4:'l', 5:'m'}})

piv

Unnamed: 0,fff,bbb,baa,zzz
0,one,P,2,h
1,one,Q,3,i
2,one,R,4,j
3,two,P,5,k
4,two,Q,6,l
5,two,R,7,m


索引轉成 fff 欄位
欄位轉成 bbb 欄位
值轉成 baa 欄位

In [25]:
piv.pivot(index = 'fff', columns = 'bbb', values = 'baa')

bbb,P,Q,R
fff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,2,3,4
two,5,6,7


索引轉成 fff 欄位
欄位轉成 bbb 欄位
值轉成 baa、zzz 欄位

In [26]:
piv.pivot(index = 'fff', columns = 'bbb', values = ['baa', 'zzz'])

Unnamed: 0_level_0,baa,baa,baa,zzz,zzz,zzz
bbb,P,Q,R,P,Q,R
fff,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,2,3,4,h,i,j
two,5,6,7,k,l,m


# Homework

題目 : 運用下列分數資料重新建構資料，將索引(index)依序改為sex、class、student_id，
       欄位依序改成chinese_score、english_score、math_score

In [46]:
score_df = pd.DataFrame([[1,50,80,70,'boy',1],
                         [2,60,45,50,'boy',2],
                         [3,98,43,55,'boy',1],
                         [4,70,69,89,'boy',2],
                         [5,56,79,60,'girl',1],
                         [6,60,68,55,'girl',2],
                         [7,45,70,77,'girl',1],
                         [8,55,77,76,'girl',2],
                         [9,25,57,60,'girl',1],
                         [10,88,40,43,'girl',3],
                         [11,25,60,45,'boy',3],
                         [12,80,60,23,'boy',3],
                         [13,20,90,66,'girl',3],
                         [14,50,50,50,'girl',3],
                         [15,89,67,77,'girl',3]],
                        columns=['student_id','math_score','english_score','chinese_score','sex','class'])
score_df

Unnamed: 0,student_id,math_score,english_score,chinese_score,sex,class
0,1,50,80,70,boy,1
1,2,60,45,50,boy,2
2,3,98,43,55,boy,1
3,4,70,69,89,boy,2
4,5,56,79,60,girl,1
5,6,60,68,55,girl,2
6,7,45,70,77,girl,1
7,8,55,77,76,girl,2
8,9,25,57,60,girl,1
9,10,88,40,43,girl,3


In [50]:
#將索引(index)依序改為sex、class、student_id，
#欄位依序改成chinese_score、english_score、math_score

#先把欄位轉成variable, values轉成value
df = score_df.melt(id_vars =['sex','class','student_id'])
df

Unnamed: 0,sex,class,student_id,variable,value
0,boy,1,1,math_score,50
1,boy,2,2,math_score,60
2,boy,1,3,math_score,98
3,boy,2,4,math_score,70
4,girl,1,5,math_score,56
5,girl,2,6,math_score,60
6,girl,1,7,math_score,45
7,girl,2,8,math_score,55
8,girl,1,9,math_score,25
9,girl,3,10,math_score,88


In [54]:
#將索引(index)依序改為sex、class、student_id，
#欄位依序改成chinese_score、english_score、math_score

#再用pivot()將column轉index
df.pivot(index =['sex','class','student_id'], columns = 'variable', values = 'value')

Unnamed: 0_level_0,Unnamed: 1_level_0,variable,chinese_score,english_score,math_score
sex,class,student_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
boy,1,1,70,80,50
boy,1,3,55,43,98
boy,2,2,50,45,60
boy,2,4,89,69,70
boy,3,11,45,60,25
boy,3,12,23,60,80
girl,1,5,60,79,56
girl,1,7,77,70,45
girl,1,9,60,57,25
girl,2,6,55,68,60


In [64]:
#practiceII

#column: fff,bbb
#index: baa,zzz
piv

Unnamed: 0,fff,bbb,baa,zzz
0,one,P,2,h
1,one,Q,3,i
2,one,R,4,j
3,two,P,5,k
4,two,Q,6,l
5,two,R,7,m


In [67]:
#先將column轉成variable, values轉成value
a = piv.melt(id_vars = ['baa', 'zzz'])
a

Unnamed: 0,baa,zzz,variable,value
0,2,h,fff,one
1,3,i,fff,one
2,4,j,fff,one
3,5,k,fff,two
4,6,l,fff,two
5,7,m,fff,two
6,2,h,bbb,P
7,3,i,bbb,Q
8,4,j,bbb,R
9,5,k,bbb,P


In [69]:
a.pivot (index = ['baa', 'zzz'], columns = 'variable', values = 'value')

Unnamed: 0_level_0,variable,bbb,fff
baa,zzz,Unnamed: 2_level_1,Unnamed: 3_level_1
2,h,P,one
3,i,Q,one
4,j,R,one
5,k,P,two
6,l,Q,two
7,m,R,two
