<h1>3장 Pandas로 데이터 가공하기</h1>

<h2>데이터세트 결합: Concat과 Append</h2>

In [1]:
import pandas as pd
import numpy as np

In [4]:
def make_df(cols, ind):
    """빠르게 DataFreme 생성"""
    data = {c : [str(c) + str(i) for i in ind]
           for c in cols}
    return pd.DataFrame(data, ind)

#DataFreme 예제
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


<b>복습:Numpy 배열 연결</b>

In [5]:
x = [1,2,3]
y = [4,5,6]
z = [7,8,9]

np.concatenate([x,y,z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
x = [[1,2], [3,4]]
np.concatenate([x,x], axis = 1)

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

<b>pd.concat을 이용한 간단한 연결</b>

\# Pandas 0.19 버전에서 pd.concat() 함수 시그니처 <br>
```pd.concat(objs, axis = 0, join='outer', join_axes=None, <br>
          ingone_index = False, keys = None, levels=None, <br>
          names = None, verify_integrity=False, copy=True)```

In [13]:
ser1 = pd.Series(['A','B','C'], index=[1,2,3])
ser2 = pd.Series(['D','E','F'], index=[4,5,6])
pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [17]:
df1 = make_df('AB', [1,2])
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [19]:
df2 = make_df('AB', [3,4])
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [22]:
pd.concat([df1, df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [24]:
df3 = make_df('AB', [0,1])
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [25]:
df4 = make_df('CD', [0,1])
df4

Unnamed: 0,C,D
0,C0,D0
1,C1,D1


In [26]:
pd.concat([df3, df4], axis = 1)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [27]:
pd.concat([df3, df4])

Unnamed: 0,A,B,C,D
0,A0,B0,,
1,A1,B1,,
0,,,C0,D0
1,,,C1,D1


<b>인덱스 복제</b>

In [30]:
x = make_df('AB', [0,1])
x

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [31]:
y = make_df('AB', [2,3])
y

Unnamed: 0,A,B
2,A2,B2
3,A3,B3


In [33]:
y.index = x.index # 복제 인덱스 생성!
pd.concat([x,y])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


In [35]:
try:
    pd.concat([x,y], verify_integrity=True)
except ValueError as e:
        print("ValueError: ", e)

ValueError:  Indexes have overlapping values: [0, 1]


In [36]:
x

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [37]:
y

Unnamed: 0,A,B
0,A2,B2
1,A3,B3


In [41]:
pd.concat([x,y], ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [43]:
pd.concat([x,y], keys=['x','y'])

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


<b>조인을 이용한 연결</b>

In [44]:
df5 = make_df('ABC', [1,2])
df5

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2


In [45]:
df6 = make_df('BCD', [3,4])
df6

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4


In [46]:
pd.concat([df5, df6])

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [47]:
pd.concat([df5,df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [49]:
pd.concat([df5,df6], join_axes=[df5.columns])

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4


In [50]:
df5.columns

Index(['A', 'B', 'C'], dtype='object')

In [51]:
pd.concat([df5,df6], join_axes=[df6.columns])

Unnamed: 0,B,C,D
1,B1,C1,
2,B2,C2,
3,B3,C3,D3
4,B4,C4,D4


<b>append() 메서드</b>

In [52]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [53]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [54]:
df1.append(df2)

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
