# 객체 합치기

### 1. concat

In [1]:
import pandas as pd

In [2]:
df1 = pd.DataFrame({'a':['a0','a1','a2','a3'],
                   'b':['b0','b1','b2','b3'],
                   'c':['c0','c1','c2','c3']},
                  index = [0,1,2,3])

df2 = pd.DataFrame({'a':['a2','a3','a4','a5'],
                   'b':['b2','b3','b4','b5'],
                   'c':['c2','c3','c4','c5'],
                   'd':['d2','d3','d4','d5']},
                   index = [2,3,4,5])

print(df1, '\n')
print(df2)

    a   b   c
0  a0  b0  c0
1  a1  b1  c1
2  a2  b2  c2
3  a3  b3  c3 

    a   b   c   d
2  a2  b2  c2  d2
3  a3  b3  c3  d3
4  a4  b4  c4  d4
5  a5  b5  c5  d5


In [3]:
result1 = pd.concat([df1,df2])
result1
# defalut값으로 axis=0이 적용되기 때문에 행방향(위아래)으로 데이터프레임을 이어붙인다.
# 그런데 df1에는 d열이 없으므로 NaN값이 채워진 것을 알 수 있다.

Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
3,a3,b3,c3,
2,a2,b2,c2,d2
3,a3,b3,c3,d3
4,a4,b4,c4,d4
5,a5,b5,c5,d5


In [None]:
# 그냥 이어붙이니 행 인덱스번호도 그대로 가져왔기때문에, 
# ignore_index=True을 줘서 인덱스를 재배열 할 수 있다.

In [4]:
result2 = pd.concat([df1,df2], ignore_index=True)
result2

Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
3,a3,b3,c3,
4,a2,b2,c2,d2
5,a3,b3,c3,d3
6,a4,b4,c4,d4
7,a5,b5,c5,d5


In [5]:
# 이번에는 열방향axis=1(좌우)으로 이어붙여보자.
result3 = pd.concat([df1,df2],axis=1)
result3

Unnamed: 0,a,b,c,a.1,b.1,c.1,d
0,a0,b0,c0,,,,
1,a1,b1,c1,,,,
2,a2,b2,c2,a2,b2,c2,d2
3,a3,b3,c3,a3,b3,c3,d3
4,,,,a4,b4,c4,d4
5,,,,a5,b5,c5,d5


In [6]:
result3_in = pd.concat([df1,df2], axis=1, join='inner')   #열방향(axis=1), 교집합(inner)
result3_in

Unnamed: 0,a,b,c,a.1,b.1,c.1,d
2,a2,b2,c2,a2,b2,c2,d2
3,a3,b3,c3,a3,b3,c3,d3


In [7]:
# 시리즈를 데이터프레임에 붙이기
sr1 = pd.Series(['e0','e1','e2','e3'], name = 'e')
sr2 = pd.Series(['f0','f1','f2'], name = 'f', index = [3,4,5])
sr3 = pd.Series(['g0','g1','g2','g3'], name = 'g')

In [8]:
result4 = pd.concat([df1,sr1], axis=1)
result4

Unnamed: 0,a,b,c,e
0,a0,b0,c0,e0
1,a1,b1,c1,e1
2,a2,b2,c2,e2
3,a3,b3,c3,e3


In [9]:
result5 = pd.concat([df2,sr2], axis=1)
result5

Unnamed: 0,a,b,c,d,f
2,a2,b2,c2,d2,
3,a3,b3,c3,d3,f0
4,a4,b4,c4,d4,f1
5,a5,b5,c5,d5,f2


In [10]:
# 시리즈 끼리 붙이기
result6 = pd.concat([sr1, sr3], axis = 1)  #열방향 연결, 데이터프레임
result6

Unnamed: 0,e,g
0,e0,g0
1,e1,g1
2,e2,g2
3,e3,g3


In [11]:
print(type(result6), '\n')

<class 'pandas.core.frame.DataFrame'> 



In [14]:
result7 = pd.concat([sr1, sr3], axis = 0, ignore_index=True)  #행방향 연결, 시리즈
result7

0    e0
1    e1
2    e2
3    e3
4    g0
5    g1
6    g2
7    g3
dtype: object

In [13]:
print(type(result7), '\n')

<class 'pandas.core.series.Series'> 



### 1.1. Multiple Data Loading

In [15]:
df1 = pd.read_csv('data/stocks_2016.csv')
df1

Unnamed: 0,Symbol,Shares,Low,High
0,AAPL,80,95,110
1,TSLA,50,80,130
2,WMT,40,55,70


In [16]:
df2 = pd.read_csv('data/stocks_2017.csv')
df2

Unnamed: 0,Symbol,Shares,Low,High
0,AAPL,50,120,140
1,GE,100,30,40
2,IBM,87,75,95
3,SLB,20,55,85
4,TXN,500,15,23
5,TSLA,100,100,300


In [17]:
df3 = pd.read_csv('data/stocks_2018.csv')
df3

Unnamed: 0,Symbol,Shares,Low,High
0,AAPL,40,135,170
1,AMZN,8,900,1125
2,TSLA,50,220,400


In [18]:
years = 2016, 2017, 2018
type(years)

tuple

In [19]:
stock_tables = [pd.read_csv('data/stocks_{}.csv'.format(year), index_col='Symbol') for year in years]
stock_tables

[        Shares  Low  High
 Symbol                   
 AAPL        80   95   110
 TSLA        50   80   130
 WMT         40   55    70,
         Shares  Low  High
 Symbol                   
 AAPL        50  120   140
 GE         100   30    40
 IBM         87   75    95
 SLB         20   55    85
 TXN        500   15    23
 TSLA       100  100   300,
         Shares  Low  High
 Symbol                   
 AAPL        40  135   170
 AMZN         8  900  1125
 TSLA        50  220   400]

In [20]:
s_2016, s_2017, s_2018 = stock_tables
s_2017

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


In [21]:
s_2018

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,40,135,170
AMZN,8,900,1125
TSLA,50,220,400


### 1.2. 수직 병합

In [22]:
stock_tables

[        Shares  Low  High
 Symbol                   
 AAPL        80   95   110
 TSLA        50   80   130
 WMT         40   55    70,
         Shares  Low  High
 Symbol                   
 AAPL        50  120   140
 GE         100   30    40
 IBM         87   75    95
 SLB         20   55    85
 TXN        500   15    23
 TSLA       100  100   300,
         Shares  Low  High
 Symbol                   
 AAPL        40  135   170
 AMZN         8  900  1125
 TSLA        50  220   400]

In [23]:
pd.concat(stock_tables)

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300
AAPL,40,135,170


In [24]:
pd.concat(stock_tables, keys=[2016, 2017, 2018])

Unnamed: 0_level_0,Unnamed: 1_level_0,Shares,Low,High
Unnamed: 0_level_1,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,AAPL,80,95,110
2016,TSLA,50,80,130
2016,WMT,40,55,70
2017,AAPL,50,120,140
2017,GE,100,30,40
2017,IBM,87,75,95
2017,SLB,20,55,85
2017,TXN,500,15,23
2017,TSLA,100,100,300
2018,AAPL,40,135,170


### 1.3. 수평 병합

In [27]:
pd.concat(stock_tables, axis=1) # axis='columns'과 같음

Unnamed: 0,Shares,Low,High,Shares.1,Low.1,High.1,Shares.2,Low.2,High.2
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
WMT,40.0,55.0,70.0,,,,,,
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TXN,,,,500.0,15.0,23.0,,,
AMZN,,,,,,,8.0,900.0,1125.0


In [28]:
# zip 함수는 동일한 갯수의 요소값을 갖는 시퀀스 자료형을 묶어주는 역할
a = [1,2,3,4,5]
b = ['a','b','c','d','e']
 
for x,y in zip (a,b):
    print (x,y)

1 a
2 b
3 c
4 d
5 e


In [29]:
years

(2016, 2017, 2018)

In [30]:
stock_tables

[        Shares  Low  High
 Symbol                   
 AAPL        80   95   110
 TSLA        50   80   130
 WMT         40   55    70,
         Shares  Low  High
 Symbol                   
 AAPL        50  120   140
 GE         100   30    40
 IBM         87   75    95
 SLB         20   55    85
 TXN        500   15    23
 TSLA       100  100   300,
         Shares  Low  High
 Symbol                   
 AAPL        40  135   170
 AMZN         8  900  1125
 TSLA        50  220   400]

In [32]:
pd.concat(dict(zip(years,stock_tables)), axis=1)

Unnamed: 0_level_0,2016,2016,2016,2017,2017,2017,2018,2018,2018
Unnamed: 0_level_1,Shares,Low,High,Shares,Low,High,Shares,Low,High
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
WMT,40.0,55.0,70.0,,,,,,
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TXN,,,,500.0,15.0,23.0,,,
AMZN,,,,,,,8.0,900.0,1125.0


### 2. Merge

In [None]:
# merge()함수는 두 데이터프레임을 각 데이터에 존재하는 고유값(key)을 기준으로 병합할때 사용한다.
# pd.merge(df_left, df_right, how='inner', on=None)이 default이다.

In [33]:
s_2016

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70


In [34]:
s_2017

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


In [35]:
s_2016.merge(s_2017, left_index=True, right_index=True)

Unnamed: 0_level_0,Shares_x,Low_x,High_x,Shares_y,Low_y,High_y
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80,95,110,50,120,140
TSLA,50,80,130,100,100,300


In [36]:
pd.merge(s_2016, s_2017, how='inner', on='Symbol')

Unnamed: 0_level_0,Shares_x,Low_x,High_x,Shares_y,Low_y,High_y
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80,95,110,50,120,140
TSLA,50,80,130,100,100,300


In [37]:
step1 = s_2016.merge(s_2017, left_index=True, right_index=True, how='outer', suffixes=('_2016','_2017'))
step1

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80.0,95.0,110.0,50.0,120.0,140.0
GE,,,,100.0,30.0,40.0
IBM,,,,87.0,75.0,95.0
SLB,,,,20.0,55.0,85.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0
TXN,,,,500.0,15.0,23.0
WMT,40.0,55.0,70.0,,,


In [38]:
s_merge = step1.merge(s_2018.add_suffix('_2018'), left_index=True, right_index=True, how='outer')
s_merge

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017,Shares_2018,Low_2018,High_2018
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
AMZN,,,,,,,8.0,900.0,1125.0
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
TXN,,,,500.0,15.0,23.0,,,
WMT,40.0,55.0,70.0,,,,,,


### 3. Join

In [None]:
# join함수는 merge()함수를 기반으로 만들어졌다.
# 행 인덱스를 기준으로 결합한다.
# on=keys 옵션이 존재한다.
# Dataframe1.join(Dataframe2. how='left')이 default값이다.

In [39]:
# lsuffix : 왼쪽 프레임의 겹치는 열에서 사용할 접미어입니다.
# rsuffix : 오른쪽 프레임의 겹치는 열에서 사용할 접미어입니다.
s_2016.join(s_2017, lsuffix='_2016', rsuffix='_2017') # default : how='left'

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80,95,110,50.0,120.0,140.0
TSLA,50,80,130,100.0,100.0,300.0
WMT,40,55,70,,,


In [40]:
s_2016.join(s_2017, lsuffix='_2016', rsuffix='_2017', how='outer')

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80.0,95.0,110.0,50.0,120.0,140.0
GE,,,,100.0,30.0,40.0
IBM,,,,87.0,75.0,95.0
SLB,,,,20.0,55.0,85.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0
TXN,,,,500.0,15.0,23.0
WMT,40.0,55.0,70.0,,,


In [41]:
s_2017.add_suffix('_2017')

Unnamed: 0_level_0,Shares_2017,Low_2017,High_2017
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


In [42]:
other = [s_2017.add_suffix('_2017'), s_2018.add_suffix('_2018')]

In [43]:
s_2016.add_suffix('_2016').join(other, how='outer')

Unnamed: 0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017,Shares_2018,Low_2018,High_2018
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
WMT,40.0,55.0,70.0,,,,,,
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TXN,,,,500.0,15.0,23.0,,,
AMZN,,,,,,,8.0,900.0,1125.0


### 4. 병합의 활용

In [44]:
names = ['prices', 'transactions']
food_tables = [pd.read_csv('data/food_{}.csv'.format(name)) for name in names]
food_prices, food_tr = food_tables

In [45]:
food_prices

Unnamed: 0,item,store,price,Date
0,pear,A,0.99,2017
1,pear,B,1.99,2017
2,peach,A,2.99,2017
3,peach,B,3.49,2017
4,banana,A,0.39,2017
5,banana,B,0.49,2017
6,steak,A,5.99,2017
7,steak,B,6.99,2017
8,steak,B,4.99,2015


In [46]:
food_tr

Unnamed: 0,custid,item,store,quantity
0,1,pear,A,5
1,1,banana,A,10
2,2,steak,B,3
3,2,pear,B,1
4,2,peach,B,2
5,2,steak,B,1
6,2,coconut,B,4


In [48]:
food_merge_inner1 = pd.merge(food_prices, food_tr)
food_merge_inner1

Unnamed: 0,item,store,price,Date,custid,quantity
0,pear,A,0.99,2017,1,5
1,pear,B,1.99,2017,2,1
2,peach,B,3.49,2017,2,2
3,banana,A,0.39,2017,1,10
4,steak,B,6.99,2017,2,3
5,steak,B,6.99,2017,2,1
6,steak,B,4.99,2015,2,3
7,steak,B,4.99,2015,2,1


In [49]:
food_merge_inner2 = pd.merge(food_prices, food_tr, how='inner',on='item')
food_merge_inner2

Unnamed: 0,item,store_x,price,Date,custid,store_y,quantity
0,pear,A,0.99,2017,1,A,5
1,pear,A,0.99,2017,2,B,1
2,pear,B,1.99,2017,1,A,5
3,pear,B,1.99,2017,2,B,1
4,peach,A,2.99,2017,2,B,2
5,peach,B,3.49,2017,2,B,2
6,banana,A,0.39,2017,1,A,10
7,banana,B,0.49,2017,1,A,10
8,steak,A,5.99,2017,2,B,3
9,steak,A,5.99,2017,2,B,1


In [50]:
food_merge_inner3 = food_tr.merge(food_prices, on=['item','store'])
food_merge_inner3

Unnamed: 0,custid,item,store,quantity,price,Date
0,1,pear,A,5,0.99,2017
1,1,banana,A,10,0.39,2017
2,2,steak,B,3,6.99,2017
3,2,steak,B,3,4.99,2015
4,2,steak,B,1,6.99,2017
5,2,steak,B,1,4.99,2015
6,2,pear,B,1,1.99,2017
7,2,peach,B,2,3.49,2017


In [51]:
merge_outer1 = pd.merge(food_prices,food_tr, how='outer')
merge_outer1

Unnamed: 0,item,store,price,Date,custid,quantity
0,pear,A,0.99,2017.0,1.0,5.0
1,pear,B,1.99,2017.0,2.0,1.0
2,peach,A,2.99,2017.0,,
3,peach,B,3.49,2017.0,2.0,2.0
4,banana,A,0.39,2017.0,1.0,10.0
5,banana,B,0.49,2017.0,,
6,steak,A,5.99,2017.0,,
7,steak,B,6.99,2017.0,2.0,3.0
8,steak,B,6.99,2017.0,2.0,1.0
9,steak,B,4.99,2015.0,2.0,3.0


In [52]:
merge_outer2 = pd.merge(food_prices,food_tr, how='outer',on='item')
merge_outer2

Unnamed: 0,item,store_x,price,Date,custid,store_y,quantity
0,pear,A,0.99,2017.0,1,A,5
1,pear,A,0.99,2017.0,2,B,1
2,pear,B,1.99,2017.0,1,A,5
3,pear,B,1.99,2017.0,2,B,1
4,peach,A,2.99,2017.0,2,B,2
5,peach,B,3.49,2017.0,2,B,2
6,banana,A,0.39,2017.0,1,A,10
7,banana,B,0.49,2017.0,1,A,10
8,steak,A,5.99,2017.0,2,B,3
9,steak,A,5.99,2017.0,2,B,1


In [53]:
food_prices

Unnamed: 0,item,store,price,Date
0,pear,A,0.99,2017
1,pear,B,1.99,2017
2,peach,A,2.99,2017
3,peach,B,3.49,2017
4,banana,A,0.39,2017
5,banana,B,0.49,2017
6,steak,A,5.99,2017
7,steak,B,6.99,2017
8,steak,B,4.99,2015


In [54]:
food_tr

Unnamed: 0,custid,item,store,quantity
0,1,pear,A,5
1,1,banana,A,10
2,2,steak,B,3
3,2,pear,B,1
4,2,peach,B,2
5,2,steak,B,1
6,2,coconut,B,4


In [55]:
food_merge_left1 = pd.merge(food_prices, food_tr, how='left')
food_merge_left1

Unnamed: 0,item,store,price,Date,custid,quantity
0,pear,A,0.99,2017,1.0,5.0
1,pear,B,1.99,2017,2.0,1.0
2,peach,A,2.99,2017,,
3,peach,B,3.49,2017,2.0,2.0
4,banana,A,0.39,2017,1.0,10.0
5,banana,B,0.49,2017,,
6,steak,A,5.99,2017,,
7,steak,B,6.99,2017,2.0,3.0
8,steak,B,6.99,2017,2.0,1.0
9,steak,B,4.99,2015,2.0,3.0


In [56]:
food_merge_left2 = pd.merge(food_prices, food_tr, how='left' ,left_on='item', right_on='store')
food_merge_left2

Unnamed: 0,item_x,store_x,price,Date,custid,item_y,store_y,quantity
0,pear,A,0.99,2017,,,,
1,pear,B,1.99,2017,,,,
2,peach,A,2.99,2017,,,,
3,peach,B,3.49,2017,,,,
4,banana,A,0.39,2017,,,,
5,banana,B,0.49,2017,,,,
6,steak,A,5.99,2017,,,,
7,steak,B,6.99,2017,,,,
8,steak,B,4.99,2015,,,,


In [57]:
set(food_prices['item'].unique())

{'banana', 'peach', 'pear', 'steak'}

In [60]:
set(food_tr['item'].unique())

{'banana', 'coconut', 'peach', 'pear', 'steak'}

In [62]:
set(food_tr['item'].unique()) - set(food_prices['item'].unique())

{'coconut'}

In [63]:
food_merge_right1 = pd.merge(food_prices, food_tr, how='right')
food_merge_right1

Unnamed: 0,item,store,price,Date,custid,quantity
0,pear,A,0.99,2017.0,1,5
1,banana,A,0.39,2017.0,1,10
2,steak,B,6.99,2017.0,2,3
3,steak,B,4.99,2015.0,2,3
4,steak,B,6.99,2017.0,2,1
5,steak,B,4.99,2015.0,2,1
6,pear,B,1.99,2017.0,2,1
7,peach,B,3.49,2017.0,2,2
8,coconut,B,,,2,4


In [65]:
food_merge_right1 = pd.merge(food_prices, food_tr, how='left')
food_merge_right1

Unnamed: 0,item,store,price,Date,custid,quantity
0,pear,A,0.99,2017,1.0,5.0
1,pear,B,1.99,2017,2.0,1.0
2,peach,A,2.99,2017,,
3,peach,B,3.49,2017,2.0,2.0
4,banana,A,0.39,2017,1.0,10.0
5,banana,B,0.49,2017,,
6,steak,A,5.99,2017,,
7,steak,B,6.99,2017,2.0,3.0
8,steak,B,6.99,2017,2.0,1.0
9,steak,B,4.99,2015,2.0,3.0


In [64]:
food_merge_right2 = pd.merge(food_prices, food_tr, how='right' ,left_on='item', right_on='store')
food_merge_right2

Unnamed: 0,item_x,store_x,price,Date,custid,item_y,store_y,quantity
0,,,,,1,pear,A,5
1,,,,,1,banana,A,10
2,,,,,2,steak,B,3
3,,,,,2,pear,B,1
4,,,,,2,peach,B,2
5,,,,,2,steak,B,1
6,,,,,2,coconut,B,4


In [66]:
food_tr.merge(food_prices.query('Date==2017'), how='left')

Unnamed: 0,custid,item,store,quantity,price,Date
0,1,pear,A,5,0.99,2017.0
1,1,banana,A,10,0.39,2017.0
2,2,steak,B,3,6.99,2017.0
3,2,pear,B,1,1.99,2017.0
4,2,peach,B,2,3.49,2017.0
5,2,steak,B,1,6.99,2017.0
6,2,coconut,B,4,,


In [69]:
f_join = food_prices.query('Date==2017').set_index(['item','store'])
f_join

Unnamed: 0_level_0,Unnamed: 1_level_0,price,Date
item,store,Unnamed: 2_level_1,Unnamed: 3_level_1
pear,A,0.99,2017
pear,B,1.99,2017
peach,A,2.99,2017
peach,B,3.49,2017
banana,A,0.39,2017
banana,B,0.49,2017
steak,A,5.99,2017
steak,B,6.99,2017


# Magic Commander

### 1. %run : 외부 코드 실행

In [70]:
# 외부 코드를 실행하면 그 안에 정의된 함수를 세션에서 사용 가능
%run data/myscript.py

1 의 제곱은 1
2 의 제곱은 4
3 의 제곱은 9


In [71]:
square(5)

25

### 2. 코드 실행 시간 측정

In [72]:
# %timeit : 단일 코드 시간 측정

%timeit L = [n ** 2 for n in range(1000)]

217 µs ± 6.28 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [73]:
# %%timeit를 사용하면 여러 코드 실행 시간 측정 가능

%%timeit
L = []
for n in range(1000):
    L.append(n**2)

256 µs ± 2.39 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### 3. 매직 커맨더 도움말 : ?, %magic, %lsmagic

In [None]:
# ? : 명령어 뒤에 ? 붙이면 도움말
# %magic : 매직 커맨더에 대한 일반적인 설명과 예제
# %lsmagic : 매직 커맨더 리스트

In [74]:
%xmode?

In [75]:
%run?

### 4. %history : 이전에 실행된 명령

In [76]:
%history -n 1-4

   1: import pandas as pd
   2:
df1 = pd.DataFrame({'a':['a0','a1','a2','a3'],
                   'b':['b0','b1','b2','b3'],
                   'c':['c0','c1','c2','c3']},
                  index = [0,1,2,3])

df2 = pd.DataFrame({'a':['a2','a3','a4','a5'],
                   'b':['b2','b3','b4','b5'],
                   'c':['c2','c3','c4','c5'],
                   'd':['d2','d3','d4','d5']},
                   index = [2,3,4,5])

print(df1, '\n')
print(df2)
   3:
result1 = pd.concat([df1,df2])
result1
# defalut값으로 axis=0이 적용되기 때문에 행방향(위아래)으로 데이터프레임을 이어붙인다.
# 그런데 df1에는 d열이 없으므로 NaN값이 채워진 것을 알 수 있다.
   4:
result2 = pd.concat([df1,df2], ignore_index=True)
result2


### 5. automagic 함수 : %없이도 사용가능

In [None]:
"""
%cd : 폴더 이동
%ls : 폴더 내에 모든 파일 리스트
%mkdir : 폴더 생성
%mv : 이동
%cp : 복사
%pwd : 현재 작업 폴더
%rm : 삭제
%rmdir : 폴더 삭제
"""

In [77]:
pwd

'D:\\빅데이터분석전문가\\강의자료\\데이터전처리'

In [78]:
ls

 D 드라이브의 볼륨에는 이름이 없습니다.
 볼륨 일련 번호: C64D-C287

 D:\빅데이터분석전문가\강의자료\데이터전처리 디렉터리

2021-01-19  오전 11:29    <DIR>          .
2021-01-19  오전 11:29    <DIR>          ..
2021-01-18  오후 12:05    <DIR>          .ipynb_checkpoints
2021-01-18  오전 11:45            26,931 Aggregation_practice.ipynb
2021-01-08  오후 12:51    <DIR>          bank
2021-01-08  오후 12:51           579,143 bank.zip
2020-12-31  오전 11:52    <DIR>          data
2020-12-31  오전 11:52        16,069,456 data.zip
2021-01-08  오후 12:50    <DIR>          DATA_01
2021-01-08  오후 12:49           531,678 dataProcessing2.html
2021-01-11  오후 04:29           348,720 dataProcessing2.ipynb
2021-01-18  오전 10:38            88,756 hotel.zip
2021-01-18  오전 10:39    <DIR>          hotel_data
2021-01-18  오전 11:46           292,978 join과merge.html
2021-01-18  오후 12:22            18,710 join과merge.ipynb
2021-01-08  오후 12:49           543,753 과제.zip
2021-01-11  오후 03:03             8,226 금수저.xlsx
2020-12-31  오전 11:49           393,979 데이터전처리 실습1.html
2021

In [None]:
# https://ipython.readthedocs.io/en/stable/interactive/magics.html