## 09. 가공

## 참고자료
* [Python 완전정복 시리즈] 2편 : Pandas DataFrame 완전정복 : https://wikidocs.net/book/7188

In [1]:
import pandas as pd
import numpy as np

## 열 삽입(insert)

In [12]:
data = [[1,2,3],[4,5,6],[7,8,9]]
col = ['col1','col2','col3']
row = ['row1','row2','row3']
df = pd.DataFrame(data=data,index=row,columns=col)
df

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,5,6
row3,7,8,9


In [13]:
df.insert(3,'col4',[10,11,12])
df

Unnamed: 0,col1,col2,col3,col4
row1,1,2,3,10
row2,4,5,6,11
row3,7,8,9,12


In [14]:
df.insert(3,'col3',[10,11,12],allow_duplicates=True)
df

Unnamed: 0,col1,col2,col3,col3.1,col4
row1,1,2,3,10,10
row2,4,5,6,11,11
row3,7,8,9,12,12


## 열 꺼내기(pop)

In [18]:
data = [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
col = ['col1','col2','col3','col4']
row = ['row1','row2','row3','row4']
df = pd.DataFrame(data=data,index=row,columns=col)
df

Unnamed: 0,col1,col2,col3,col4
row1,1,2,3,4
row2,5,6,7,8
row3,9,10,11,12
row4,13,14,15,16


In [19]:
df.pop('col3')
df

Unnamed: 0,col1,col2,col4
row1,1,2,4
row2,5,6,8
row3,9,10,12
row4,13,14,16


## 복사 (copy)

In [21]:
sr = pd.Series([1, 2], index=["col1", "col2"])
deep = sr.copy(deep=True)
shallow = sr.copy(deep=False)
sr

col1    1
col2    2
dtype: int64

In [22]:
sr[0] = 9
shallow[1] = 8
deep[1]=7

In [23]:
sr

col1    9
col2    8
dtype: int64

In [24]:
shallow

col1    9
col2    8
dtype: int64

In [25]:
deep

col1    1
col2    7
dtype: int64

In [26]:
col = ['col1','col2','col3']
row = ['row1','row2','row3']
df = pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],index=row,columns=col)
deep = df.copy(deep=True)
shallow = df.copy(deep=False)
df

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,5,6
row3,7,8,9


In [27]:
df['col1']['row1']=99
shallow['col2']['row2']=88
deep['col2']['row2']=77


In [28]:
df

Unnamed: 0,col1,col2,col3
row1,99,2,3
row2,4,5,6
row3,7,8,9


In [29]:
shallow

Unnamed: 0,col1,col2,col3
row1,99,2,3
row2,4,88,6
row3,7,8,9


In [30]:
deep

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,77,6
row3,7,8,9


## 행/열 삭제(drop)

In [32]:
# pop과 다르게 원본 변경 X

sr = pd.Series([1, 2], index=["col1", "col2"])
deep = sr.copy(deep=True)
shallow = sr.copy(deep=False)
sr

col1    1
col2    2
dtype: int64

In [33]:
row = ['row1','row2','row3']
col = ['col1','col2','col3']
data = [[1,2,3],[4,5,6],[7,8,9]]
df = pd.DataFrame(data=data, index=row, columns=col)
df

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,5,6
row3,7,8,9


In [34]:
df.drop(labels='row2',axis=0)

Unnamed: 0,col1,col2,col3
row1,1,2,3
row3,7,8,9


In [35]:
df.drop(labels='col2',axis=1)

Unnamed: 0,col1,col3
row1,1,3
row2,4,6
row3,7,9


In [36]:
df.drop(index='row3')

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,5,6


In [37]:
df.drop(columns='col3')

Unnamed: 0,col1,col2
row1,1,2
row2,4,5
row3,7,8


In [38]:
df.drop(labels=['row3','row4'],errors='raise')

KeyError: "['row4'] not found in axis"

In [39]:
df.drop(labels=['row3','row4'],errors='ignore')

Unnamed: 0,col1,col2,col3
row1,1,2,3
row2,4,5,6


In [40]:
df.drop(labels=['col1','col2'],axis=1,inplace=True)
df

Unnamed: 0,col3
row1,3
row2,6
row3,9


## 행 추가(append)

In [41]:
df = pd.DataFrame(data=[[1,2],[3,4]], index=['row1','row2'], columns=['col1','col3'])
df

Unnamed: 0,col1,col3
row1,1,2
row2,3,4


In [42]:
df2 = pd.DataFrame(data=[[5,6]],index=['row3'],columns=['col2','col4'])

In [43]:
df.append(df2)

  df.append(df2)


Unnamed: 0,col1,col3,col2,col4
row1,1.0,2.0,,
row2,3.0,4.0,,
row3,,,5.0,6.0


In [44]:
df.append(df2, sort=True)

  df.append(df2, sort=True)


Unnamed: 0,col1,col2,col3,col4
row1,1.0,,2.0,
row2,3.0,,4.0,
row3,,5.0,,6.0


In [45]:
df.append(df2,sort=True,ignore_index=True)

  df.append(df2,sort=True,ignore_index=True)


Unnamed: 0,col1,col2,col3,col4
0,1.0,,2.0,
1,3.0,,4.0,
2,,5.0,,6.0


In [46]:
## verifiy_intergrity 인수 사용
df3 = pd.DataFrame(data=[[7,8],[9,0]], index=['row2','row3'], columns=['col1','col3'])
df3

Unnamed: 0,col1,col3
row2,7,8
row3,9,0


In [47]:
df.append(df3,verify_integrity=False)

  df.append(df3,verify_integrity=False)


Unnamed: 0,col1,col3
row1,1,2
row2,3,4
row2,7,8
row3,9,0


In [49]:
df.append(df3,verify_integrity=True)

  df.append(df3,verify_integrity=True)


ValueError: Indexes have overlapping values: Index(['row2'], dtype='object')

## 자르기 (truncate)

In [50]:
row = ['row1','row2','row3','row4']
col = ['col1','col2','col3','col4']
data = [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
df = pd.DataFrame(data=data, index=row, columns=col)
df

Unnamed: 0,col1,col2,col3,col4
row1,1,2,3,4
row2,5,6,7,8
row3,9,10,11,12
row4,13,14,15,16


In [51]:
df.truncate(before='row2',after='row3',axis=0)

Unnamed: 0,col1,col2,col3,col4
row2,5,6,7,8
row3,9,10,11,12


In [52]:
df.truncate(before='col2',after='col3',axis=1)

Unnamed: 0,col2,col3
row1,2,3
row2,6,7
row3,10,11
row4,14,15


## 중복행 제거 (drop_duplicates)

In [53]:
col = ['col1','col2','col3']
data = [['A','x','-'],['A','x','-'],['B','x','앞'],['B','y','-'],['B','y','뒤']]
df = pd.DataFrame(data=data, columns=col)
df

Unnamed: 0,col1,col2,col3
0,A,x,-
1,A,x,-
2,B,x,앞
3,B,y,-
4,B,y,뒤


In [54]:
# subset 없을시 모든 열 기준
df.drop_duplicates()

Unnamed: 0,col1,col2,col3
0,A,x,-
2,B,x,앞
3,B,y,-
4,B,y,뒤


In [55]:
df.drop_duplicates(subset='col2')

Unnamed: 0,col1,col2,col3
0,A,x,-
3,B,y,-


In [56]:
df.drop_duplicates(subset=['col1','col2'])

Unnamed: 0,col1,col2,col3
0,A,x,-
2,B,x,앞
3,B,y,-


In [57]:
# keep 인수를 통해 중복값을 제거하고 남길 행 선택 가능
df.drop_duplicates(subset='col1',keep='first')

Unnamed: 0,col1,col2,col3
0,A,x,-
2,B,x,앞


In [58]:
df.drop_duplicates(subset='col1',keep='last')

Unnamed: 0,col1,col2,col3
1,A,x,-
4,B,y,뒤


In [59]:
df.drop_duplicates(subset='col1',keep='last',ignore_index=True)

Unnamed: 0,col1,col2,col3
0,A,x,-
1,B,y,뒤


In [60]:
df.drop_duplicates(subset='col3',inplace=True)
df

Unnamed: 0,col1,col2,col3
0,A,x,-
2,B,x,앞
4,B,y,뒤


## 차원축소, 스칼라 변환 (squeeze)

In [61]:
df =pd.DataFrame(data=[[1,2],[3,4]],index=['row1','row2'],columns=['col1','col2'])
df

Unnamed: 0,col1,col2
row1,1,2
row2,3,4


In [62]:
df_row=df.drop(index='row1')
df_row

Unnamed: 0,col1,col2
row2,3,4


In [65]:
df_row.squeeze()

col1    3
col2    4
Name: row2, dtype: int64

In [66]:
df_col=df.drop(columns='col1')

In [67]:
df_row.squeeze()

col1    3
col2    4
Name: row2, dtype: int64

In [68]:
df_col_row = df.drop(index='row1',columns='col1')
df_col_row

Unnamed: 0,col2
row2,4


In [70]:
df_col_row.squeeze()

4

In [71]:
df.squeeze()

Unnamed: 0,col1,col2
row1,1,2
row2,3,4


## 피벗변환 (pivot)

In [72]:
col = ['Machine','Country','Price','Brand']
data = [['TV','Korea',1000,'A'],
        ['TV','Japan',1300,'B'],
        ['TV','China',300,'C'],
        ['PC','Korea',2000,'A'],
        ['PC','Japan',3000,'E'],
        ['PC','China',450,'F']]
df = pd.DataFrame(data=data, columns=col)
df

Unnamed: 0,Machine,Country,Price,Brand
0,TV,Korea,1000,A
1,TV,Japan,1300,B
2,TV,China,300,C
3,PC,Korea,2000,A
4,PC,Japan,3000,E
5,PC,China,450,F


In [73]:
df.pivot(index='Machine',columns='Country',values='Price')

Country,China,Japan,Korea
Machine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PC,450,3000,2000
TV,300,1300,1000


In [74]:
df.pivot(index='Machine',columns='Country',values=['Price','Brand'])

Unnamed: 0_level_0,Price,Price,Price,Brand,Brand,Brand
Country,China,Japan,Korea,China,Japan,Korea
Machine,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
PC,450,3000,2000,F,E,A
TV,300,1300,1000,C,B,A


In [75]:
df.pivot(index='Machine',columns='Country')

Unnamed: 0_level_0,Price,Price,Price,Brand,Brand,Brand
Country,China,Japan,Korea,China,Japan,Korea
Machine,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
PC,450,3000,2000,F,E,A
TV,300,1300,1000,C,B,A


In [76]:
df.pivot(index='Machine',columns='Country')['Brand']

Country,China,Japan,Korea
Machine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PC,F,E,A
TV,C,B,A


In [77]:
df.pivot(index=['Country','Machine'],columns='Brand',values='Price')

Unnamed: 0_level_0,Brand,A,B,C,E,F
Country,Machine,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
China,PC,,,,,450.0
China,TV,,,300.0,,
Japan,PC,,,,3000.0,
Japan,TV,,1300.0,,,
Korea,PC,2000.0,,,,
Korea,TV,1000.0,,,,


In [78]:
df.pivot(index='Country',columns=['Machine','Brand'],values='Price')

Machine,TV,TV,TV,PC,PC,PC
Brand,A,B,C,A,E,F
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
China,,,300.0,,,450.0
Japan,,1300.0,,,3000.0,
Korea,1000.0,,,2000.0,,


In [79]:
df2 = pd.DataFrame(data=[['A','x',1],['A','x',2],['B','y',3],['B','z',4]],columns=['col1','col2','col3'])
df2

Unnamed: 0,col1,col2,col3
0,A,x,1
1,A,x,2
2,B,y,3
3,B,z,4


In [80]:
# 중복값으로 피벗테이블 생성 불가능할시 오류 발생
df2.pivot(index='col1',columns='col2',values='col3')

ValueError: Index contains duplicate entries, cannot reshape

## 피벗생성_스프레드시트 기반 (pivot_table)

In [81]:
col = ['Machine','Country','Grade','Price','Count']
data = [['TV','Korea','A',1000,3],
        ['TV','Korea','B', 800,8],
        ['TV','Korea','B', 800,2],
        ['TV','Japan','A',1300,5],
        ['TV','Japan','A',1300,1],
        ['PC','Korea','B',1500,6],
        ['PC','Korea','A',2000,9],
        ['PC','Japan','A',3000,3],
        ['PC','Japan','B',2500,3]]
df = pd.DataFrame(data=data, columns=col)
df

Unnamed: 0,Machine,Country,Grade,Price,Count
0,TV,Korea,A,1000,3
1,TV,Korea,B,800,8
2,TV,Korea,B,800,2
3,TV,Japan,A,1300,5
4,TV,Japan,A,1300,1
5,PC,Korea,B,1500,6
6,PC,Korea,A,2000,9
7,PC,Japan,A,3000,3
8,PC,Japan,B,2500,3


In [82]:
df.pivot_table(values='Count',index=['Machine','Country'],columns='Grade',aggfunc=np.sum)

Unnamed: 0_level_0,Grade,A,B
Machine,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
PC,Japan,3.0,3.0
PC,Korea,9.0,6.0
TV,Japan,6.0,
TV,Korea,3.0,10.0


In [83]:
df.pivot_table(values='Count',index=['Machine','Country'],columns='Grade',aggfunc=np.sum,sort=False)

Unnamed: 0_level_0,Grade,A,B
Machine,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
TV,Korea,3.0,10.0
TV,Japan,6.0,
PC,Korea,9.0,6.0
PC,Japan,3.0,3.0


In [86]:
df.pivot_table(values=['Count','Price'],index=['Machine','Country'],columns='Grade',aggfunc=[np.sum,np.mean])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,sum,sum,sum,mean,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,Count,Count,Price,Price,Count,Count,Price,Price
Unnamed: 0_level_2,Grade,A,B,A,B,A,B,A,B
Machine,Country,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
PC,Japan,3.0,3.0,3000.0,2500.0,3.0,3.0,3000.0,2500.0
PC,Korea,9.0,6.0,2000.0,1500.0,9.0,6.0,2000.0,1500.0
TV,Japan,6.0,,2600.0,,3.0,,1300.0,
TV,Korea,3.0,10.0,1000.0,1600.0,3.0,5.0,1000.0,800.0


In [87]:
df.pivot_table(values=['Count','Price'],index=['Machine','Country'],columns='Grade',aggfunc={'Count':np.sum,'Price':np.mean})

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Count,Price,Price
Unnamed: 0_level_1,Grade,A,B,A,B
Machine,Country,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
PC,Japan,3.0,3.0,3000.0,2500.0
PC,Korea,9.0,6.0,2000.0,1500.0
TV,Japan,6.0,,1300.0,
TV,Korea,3.0,10.0,1000.0,800.0


In [88]:
df.pivot_table(values='Count',index=['Machine','Country'],columns='Grade',aggfunc=np.sum,fill_value='누락')

Unnamed: 0_level_0,Grade,A,B
Machine,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
PC,Japan,3.0,3.0
PC,Korea,9.0,6.0
TV,Japan,6.0,누락
TV,Korea,3.0,10.0


In [89]:
df.pivot_table(values='Count',index=['Machine','Country'],columns='Grade',aggfunc=np.sum,margins=True)

Unnamed: 0_level_0,Grade,A,B,All
Machine,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PC,Japan,3.0,3.0,6
PC,Korea,9.0,6.0,15
TV,Japan,6.0,,6
TV,Korea,3.0,10.0,13
All,,21.0,19.0,40


In [90]:
df.pivot_table(values='Count',index=['Machine','Country'],columns='Grade',aggfunc=np.sum,margins=True,margins_name='총계')

Unnamed: 0_level_0,Grade,A,B,총계
Machine,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PC,Japan,3.0,3.0,6
PC,Korea,9.0,6.0,15
TV,Japan,6.0,,6
TV,Korea,3.0,10.0,13
총계,,21.0,19.0,40


## 피벗해제 (melt)

In [91]:
col = ['Country','Machine','Price','Brand']
data = [['Korea','TV',1000,'A'],
        ['Japan','TV',1300,'B'],
        ['Korea','PC',2000,'A'],
        ['Japan','PC',3000,'E']]
df = pd.DataFrame(data=data, columns=col)
df

Unnamed: 0,Country,Machine,Price,Brand
0,Korea,TV,1000,A
1,Japan,TV,1300,B
2,Korea,PC,2000,A
3,Japan,PC,3000,E


In [92]:
df.melt(id_vars='Country',value_vars=['Machine','Price'])

Unnamed: 0,Country,variable,value
0,Korea,Machine,TV
1,Japan,Machine,TV
2,Korea,Machine,PC
3,Japan,Machine,PC
4,Korea,Price,1000
5,Japan,Price,1300
6,Korea,Price,2000
7,Japan,Price,3000


In [93]:
df.melt(id_vars='Country',value_vars=['Machine','Price'],ignore_index=False)

Unnamed: 0,Country,variable,value
0,Korea,Machine,TV
1,Japan,Machine,TV
2,Korea,Machine,PC
3,Japan,Machine,PC
0,Korea,Price,1000
1,Japan,Price,1300
2,Korea,Price,2000
3,Japan,Price,3000


In [94]:
df.melt(id_vars='Country',value_vars=['Machine','Price'],var_name='Category',value_name='val')

Unnamed: 0,Country,Category,val
0,Korea,Machine,TV
1,Japan,Machine,TV
2,Korea,Machine,PC
3,Japan,Machine,PC
4,Korea,Price,1000
5,Japan,Price,1300
6,Korea,Price,2000
7,Japan,Price,3000


In [95]:
col2 = [['Area','Area','Value','Value','Value'],['Country','City','Machine','Price','Brand']]
data2 =[['Korea','Seoul','TV',1000,'A'],
        ['Japan','Tokyo','TV',1300,'B'],
        ['Korea','Jeju','PC',2000,'A'],
        ['Japan','Kyoto','PC',3000,'E']]
df2=pd.DataFrame(data=data2, columns=col2)
df2

Unnamed: 0_level_0,Area,Area,Value,Value,Value
Unnamed: 0_level_1,Country,City,Machine,Price,Brand
0,Korea,Seoul,TV,1000,A
1,Japan,Tokyo,TV,1300,B
2,Korea,Jeju,PC,2000,A
3,Japan,Kyoto,PC,3000,E


In [96]:
df2.melt(id_vars=[('Area','City')],value_vars=[('Value','Price')])

Unnamed: 0,"(Area, City)",variable_0,variable_1,value
0,Seoul,Value,Price,1000
1,Tokyo,Value,Price,1300
2,Jeju,Value,Price,2000
3,Kyoto,Value,Price,3000


In [98]:
df2.melt(id_vars='City',value_vars='Price',col_level=1)

Unnamed: 0,City,variable,value
0,Seoul,Price,1000
1,Tokyo,Price,1300
2,Jeju,Price,2000
3,Kyoto,Price,3000


## 새 열 할당 (assign)

In [99]:
df = pd.DataFrame(index=['row1','row2','row3'],data={'col1':[1,2,3]})
df

Unnamed: 0,col1
row1,1
row2,2
row3,3


In [100]:
df.assign(col2=lambda x : x.col1+2)

Unnamed: 0,col1,col2
row1,1,3
row2,2,4
row3,3,5


In [101]:
df.assign(col3=df['col1']*(-2))

Unnamed: 0,col1,col3
row1,1,-2
row2,2,-4
row3,3,-6


In [102]:
df.assign(col2=lambda x : x.col1+2,col3=df['col1']*(-2))

Unnamed: 0,col1,col2,col3
row1,1,3,-2
row2,2,4,-4
row3,3,5,-6


In [103]:
# 추가할 새 열의 이름이 기존열과 중복된다면 새 값으로 덮어씌워짐
df.assign(col1=[0,0,0])

Unnamed: 0,col1
row1,0
row2,0
row3,0


## 값 변경 (replace)

In [104]:
col = ['col1','col2','col3','col4']
row = ['row1','row2','row3','row4']
data = [['A','w',1,'alpha'],['B','x',2,'beta'],['C','y',3,'gamma'],['D','z',4,'delta']]
df = pd.DataFrame(data=data, index=row, columns=col)
df

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,x,2,beta
row3,C,y,3,gamma
row4,D,z,4,delta


In [105]:
df.replace(to_replace=1,value=99)

Unnamed: 0,col1,col2,col3,col4
row1,A,w,99,alpha
row2,B,x,2,beta
row3,C,y,3,gamma
row4,D,z,4,delta


In [106]:
df.replace(to_replace=['A','B','y','z'],value='-')

Unnamed: 0,col1,col2,col3,col4
row1,-,w,1,alpha
row2,-,x,2,beta
row3,C,-,3,gamma
row4,D,-,4,delta


In [107]:
df.replace(to_replace=['A','B','y','z'],value=['a','b','Y','Z'])

Unnamed: 0,col1,col2,col3,col4
row1,a,w,1,alpha
row2,b,x,2,beta
row3,C,Y,3,gamma
row4,D,Z,4,delta


In [109]:
df.replace(to_replace=['x','y'],method='ffill')

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,w,2,beta
row3,C,w,3,gamma
row4,D,z,4,delta


In [111]:
df.replace(to_replace=['x','y'],method='bfill')

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,z,2,beta
row3,C,z,3,gamma
row4,D,z,4,delta


In [112]:
df.replace(to_replace=['x','y'],method='bfill',limit=1)

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,x,2,beta
row3,C,z,3,gamma
row4,D,z,4,delta


In [113]:
df.replace(to_replace={'A':'a','z':'Z'})

Unnamed: 0,col1,col2,col3,col4
row1,a,w,1,alpha
row2,B,x,2,beta
row3,C,y,3,gamma
row4,D,Z,4,delta


In [114]:
df.replace(to_replace={'col3':{1:'-',4:'+'}})

Unnamed: 0,col1,col2,col3,col4
row1,A,w,-,alpha
row2,B,x,2,beta
row3,C,y,3,gamma
row4,D,z,+,delta


In [115]:
df.replace(to_replace={'col1':'B','col2':'w'},value=100)

Unnamed: 0,col1,col2,col3,col4
row1,A,100,1,alpha
row2,100,x,2,beta
row3,C,y,3,gamma
row4,D,z,4,delta


In [116]:
df.replace(to_replace=r'[e]',value='-',regex=True)

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,x,2,b-ta
row3,C,y,3,gamma
row4,D,z,4,d-lta


In [117]:
df.replace(regex=r'[e]', value='-')

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,x,2,b-ta
row3,C,y,3,gamma
row4,D,z,4,d-lta


In [118]:
df.replace(regex=r'[e]', value='-',inplace=True)
df

Unnamed: 0,col1,col2,col3,col4
row1,A,w,1,alpha
row2,B,x,2,b-ta
row3,C,y,3,gamma
row4,D,z,4,d-lta


## 리스트 형태의 값 전개 (explode)

In [119]:
data= [[[1,2,3],0,['a','b','c']],[4,[],3],[5,2,['x','y','z']]]
idx = ['row1','row2','row3']
col = ['col1','col2','col3']
df = pd.DataFrame(data = data, index = idx, columns = col)
df

Unnamed: 0,col1,col2,col3
row1,"[1, 2, 3]",0,"[a, b, c]"
row2,4,[],3
row3,5,2,"[x, y, z]"


In [120]:
df.explode('col1')

Unnamed: 0,col1,col2,col3
row1,1,0,"[a, b, c]"
row1,2,0,"[a, b, c]"
row1,3,0,"[a, b, c]"
row2,4,[],3
row3,5,2,"[x, y, z]"


In [121]:
df.explode('col2')

Unnamed: 0,col1,col2,col3
row1,"[1, 2, 3]",0.0,"[a, b, c]"
row2,4,,3
row3,5,2.0,"[x, y, z]"


In [122]:
df.explode('col3')

Unnamed: 0,col1,col2,col3
row1,"[1, 2, 3]",0,a
row1,"[1, 2, 3]",0,b
row1,"[1, 2, 3]",0,c
row2,4,[],3
row3,5,2,x
row3,5,2,y
row3,5,2,z


In [123]:
data = [[[1,2],['a','b']],[3,'c']]
idx = ['row1','row2']
col = ['col1','col2']
df = pd.DataFrame(data =data, index = idx, columns = col)
df

Unnamed: 0,col1,col2
row1,"[1, 2]","[a, b]"
row2,3,c


In [124]:
df.explode(column=['col1','col2'])

Unnamed: 0,col1,col2
row1,1,a
row1,2,b
row2,3,c


In [125]:
df.explode(column=['col1','col2'],ignore_index=True)

Unnamed: 0,col1,col2
0,1,a
1,2,b
2,3,c
