# 주요 함수 

In [3]:
import pandas as pd
import numpy as np

### concat  통합하기 



In [4]:
df = pd.DataFrame(np.arange(16).reshape(4,4), columns=['a','b','c','d'])
print(df)

    a   b   c   d
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15


In [6]:
help(pd.concat)

Help on function concat in module pandas.core.reshape.concat:

concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, copy=True)
    Concatenate pandas objects along a particular axis with optional set logic
    along the other axes.
    
    Can also add a layer of hierarchical indexing on the concatenation axis,
    which may be useful if the labels are the same (or overlapping) on
    the passed axis number.
    
    Parameters
    ----------
    objs : a sequence or mapping of Series, DataFrame, or Panel objects
        If a dict is passed, the sorted keys will be used as the `keys`
        argument, unless it is passed, in which case the values will be
        selected (see below). Any None objects will be dropped silently unless
        they are all None in which case a ValueError will be raised
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along
    join : {'inner', 'oute

### 일한 데이터프레임을 ㅇ녀결할 때 기본은 세로로 병합되어 처리 됨

In [5]:
pd.concat([df,df])

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


### 가로로 병합을 하려면 축을 인자로 넣어야 한다.

In [13]:
pd.concat([df,df],axis=1)

Unnamed: 0,a,b,c,d,a.1,b.1,c.1,d.1
0,0,1,2,3,0,1,2,3
1,4,5,6,7,4,5,6,7
2,8,9,10,11,8,9,10,11
3,12,13,14,15,12,13,14,15


###  열이 다른 경우에는 횡으로 붙이면 원소가 불일지 되는 곳에는 NaN으로 처리


In [15]:
df1 = pd.DataFrame(np.arange(16,32).reshape(4,4), index=[11,12,13,14],columns=['a','b','c','d'])
print(df1)

     a   b   c   d
11  16  17  18  19
12  20  21  22  23
13  24  25  26  27
14  28  29  30  31


In [16]:
pd.concat([df,df1],axis=1)

Unnamed: 0,a,b,c,d,a.1,b.1,c.1,d.1
0,0.0,1.0,2.0,3.0,,,,
1,4.0,5.0,6.0,7.0,,,,
2,8.0,9.0,10.0,11.0,,,,
3,12.0,13.0,14.0,15.0,,,,
11,,,,,16.0,17.0,18.0,19.0
12,,,,,20.0,21.0,22.0,23.0
13,,,,,24.0,25.0,26.0,27.0
14,,,,,28.0,29.0,30.0,31.0


In [17]:
pd.concat([df,df1],axis=0)

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
11,16,17,18,19
12,20,21,22,23
13,24,25,26,27
14,28,29,30,31


### Merge 통합하기

    배열을 join해서 합치는 역할을 한다. 병합이 되면 index가 사라진다.
    특정 열에 대해 동일한 값이 있다면 병합이 inner join이 처리되면 다른 값이 있을 때는 outer join으로 처리된다.
    

In [22]:
df11 = pd.DataFrame(np.arange(16).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df11)
df12 = pd.DataFrame(np.arange(16,32).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df12)

df22 = pd.DataFrame(np.arange(16).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df22)

    f   g   h   i
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15
    f   g   h   i
a  16  17  18  19
b  20  21  22  23
c  24  25  26  27
d  28  29  30  31
    f   g   h   i
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [25]:
help(pd.merge)

Help on function merge in module pandas.tools.merge:

merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False)
    Merge DataFrame objects by performing a database-style join operation by
    columns or indexes.
    
    If joining columns on columns, the DataFrame indexes *will be
    ignored*. Otherwise if joining indexes on indexes or indexes on a column or
    columns, the index will be passed on.
    
    Parameters
    ----------
    left : DataFrame
    right : DataFrame
    how : {'left', 'right', 'outer', 'inner'}, default 'inner'
        * left: use only keys from left frame (SQL: left outer join)
        * right: use only keys from right frame (SQL: right outer join)
        * outer: use union of keys from both frames (SQL: full outer join)
        * inner: use intersection of keys from both frames (SQL: inner join)
    on : label or list
        Field names to jo

In [None]:
how : {'left', 'right', 'outer', 'inner'}, default 'inner'
        * left: use only keys from left frame (SQL: left outer join)
        * right: use only keys from right frame (SQL: right outer join)
        * outer: use union of keys from both frames (SQL: full outer join)
        * inner: use intersection of keys from both frames (SQL: inner join)

In [17]:
df11

Unnamed: 0,f,g,h,i
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [15]:
pd.merge(df11,df11)

Unnamed: 0,f,g,h,i
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [18]:
df12

Unnamed: 0,f,g,h,i
a,16,17,18,19
b,20,21,22,23
c,24,25,26,27
d,28,29,30,31


In [11]:
pd.merge(df11,df12)

Unnamed: 0,f,g,h,i


### 통합하는 필드를 지정해서 아웃조인하기

In [12]:
print(pd.merge(df11,df12, left_on="f", right_on="f",how='outer'))

    f   g_x   h_x   i_x   g_y   h_y   i_y
0   0   1.0   2.0   3.0   NaN   NaN   NaN
1   4   5.0   6.0   7.0   NaN   NaN   NaN
2   8   9.0  10.0  11.0   NaN   NaN   NaN
3  12  13.0  14.0  15.0   NaN   NaN   NaN
4  16   NaN   NaN   NaN  17.0  18.0  19.0
5  20   NaN   NaN   NaN  21.0  22.0  23.0
6  24   NaN   NaN   NaN  25.0  26.0  27.0
7  28   NaN   NaN   NaN  29.0  30.0  31.0


In [13]:
print(pd.merge(df11,df12, on="f",how='outer'))

    f   g_x   h_x   i_x   g_y   h_y   i_y
0   0   1.0   2.0   3.0   NaN   NaN   NaN
1   4   5.0   6.0   7.0   NaN   NaN   NaN
2   8   9.0  10.0  11.0   NaN   NaN   NaN
3  12  13.0  14.0  15.0   NaN   NaN   NaN
4  16   NaN   NaN   NaN  17.0  18.0  19.0
5  20   NaN   NaN   NaN  21.0  22.0  23.0
6  24   NaN   NaN   NaN  25.0  26.0  27.0
7  28   NaN   NaN   NaN  29.0  30.0  31.0


In [43]:
print(pd.merge(df11,df22, on='f', how='inner'))

    f  g_x  h_x  i_x  g_y  h_y  i_y
0   0    1    2    3    1    2    3
1   4    5    6    7    5    6    7
2   8    9   10   11    9   10   11
3  12   13   14   15   13   14   15


###  두 개 중에 하나의 기준 칼럼을 정의하고 연결하기 

In [14]:
print(pd.merge(df11,df12, on="f",how='left'))

    f  g_x  h_x  i_x  g_y  h_y  i_y
0   0    1    2    3  NaN  NaN  NaN
1   4    5    6    7  NaN  NaN  NaN
2   8    9   10   11  NaN  NaN  NaN
3  12   13   14   15  NaN  NaN  NaN


In [100]:
print(pd.merge(df11,df12, on="f",how='right'))

    f  g_x  h_x  i_x  g_y  h_y  i_y
0  16  NaN  NaN  NaN   17   18   19
1  20  NaN  NaN  NaN   21   22   23
2  24  NaN  NaN  NaN   25   26   27
3  28  NaN  NaN  NaN   29   30   31


### 필드 여러 개를 처리

In [20]:
print(pd.merge(df11,df22, on=('f','g'), how='inner'))

    f   g  h_x  i_x  h_y  i_y
0   0   1    2    3    2    3
1   4   5    6    7    6    7
2   8   9   10   11   10   11
3  12  13   14   15   14   15


In [21]:
print(pd.merge(df11,df22, on=('f','g'), how='outer'))

    f   g  h_x  i_x  h_y  i_y
0   0   1    2    3    2    3
1   4   5    6    7    6    7
2   8   9   10   11   10   11
3  12  13   14   15   14   15
