# 1. Appending new rows to DataFrames

## 1.1. Append without using `append()` (using `loc`)

In [1]:
import numpy as np
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.float_format', lambda x : '%.3f' % x)
pd.set_option('max_columns', None)

In [8]:
df = pd.DataFrame(columns=['a','b'])
df.head()

Unnamed: 0,a,b


### 1.1.1. Add data as 'list'

In [9]:
df.loc[0] = [1,2]
df.head()

Unnamed: 0,a,b
0,1,2


### 1.1.2 Add data as 'dict'

In [10]:
df.loc[len(df)] = {'b' : 'ㅎ', 'a': 'ㅋ'}
df.head()

Unnamed: 0,a,b
0,1,2
1,ㅋ,ㅎ


### 1.1.3. Add data as 'Series'

In [11]:
df.loc["yay"] = pd.Series({'a':'ㅋ','b':'ㅎ'})
df.tail()

Unnamed: 0,a,b
0,1,2
1,ㅋ,ㅎ
yay,ㅋ,ㅎ


In [12]:
# 이미 존재하는 index에 넣기
df.loc["yay"] = pd.Series({'a':'1111','b':'2222'})
df.tail()

Unnamed: 0,a,b
0,1,2
1,ㅋ,ㅎ
yay,1111,2222


## 1.2. Append using `append()`

- 위의 `loc`와는 다르게 not in-place(returns a new copy of the DataFrame)
- `append()` : it only accept
  - DataFrame
  - Series
  - Dictionary
  - list of thes(Not list itself)

In [13]:
names_df = pd.DataFrame({
    'Name':['철수','영희','영수','영미'],
    'Age': [12,13,14,15]
}, index=['Canada','Canada','USA','USA'])
names_df

Unnamed: 0,Name,Age
Canada,철수,12
Canada,영희,13
USA,영수,14
USA,영미,15


In [15]:
# index를 뭐로 설정해야도리지 모르기때문에 에러가 남
names_df.append(
    {'Name':'영수','Age':1}
)

TypeError: Can only append a dict if ignore_index=True

### 1.2.1. ignore_index=True
- 이전 index를 다 reset한다

In [16]:
names_df.append(
    {'Name':'영수','Age':1},
    ignore_index=True
)

Unnamed: 0,Name,Age
0,철수,12
1,영희,13
2,영수,14
3,영미,15
4,영수,1


### 1.2.2. Old index 유지하기 => `append()` 할 때, dict 대신에 Series를 전달하면 됨

- `Series`를 `append`를 할 때는, `Series`의 index가 column이 되고, name이 index가 됨

In [17]:
s = pd.Series({'Name':'Zach','Age':3}, name=len(names_df))
s

Name    Zach
Age        3
Name: 4, dtype: object

In [18]:
names_df.append(s)

Unnamed: 0,Name,Age
Canada,철수,12
Canada,영희,13
USA,영수,14
USA,영미,15
4,Zach,3


In [19]:
s1 = pd.Series({'Name':'Zach','Age':3}, name=len(names_df))
s2 = pd.Series({'Name':'Zach','Age':13}, name='USA')

names_df.append([s1,s2])

Unnamed: 0,Name,Age
Canada,철수,12
Canada,영희,13
USA,영수,14
USA,영미,15
4,Zach,3
USA,Zach,13


# 2.concat, join, and merge

## 2.1. `concat()`

- DataFrame of Series object를 vertically or horizontally '연결'
- index(or columns)에 대해 align (not values)
- Default to outer join
  - operation axis에 따라 concat되는 object의 column or indexrk union됨

In [21]:
import FinanceDataReader as fdr

In [22]:
samsung_df = fdr.DataReader('005390','2009-01-01','2017-12-31')
kodex_df = fdr.DataReader('069500','2016-01-01','2017-12-31')

In [23]:
samsung_df.head(2)
kodex_df.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-01-02,200,234,200,234,2122,0.083
2009-01-05,229,234,213,223,6890,-0.047


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,21441,21449,21074,21086,7836785,-0.02
2016-01-05,21033,21283,21033,21218,14048444,0.006


In [25]:
pd.concat([samsung_df, kodex_df]) # axis=0 (수직)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-01-02,200,234,200,234,2122,0.083
2009-01-05,229,234,213,223,6890,-0.047
2009-01-06,210,234,210,212,2144,-0.049
2009-01-07,214,220,208,216,2434,0.019
2009-01-08,216,220,200,215,2280,-0.005
...,...,...,...,...,...,...
2017-12-21,29897,29952,29387,29394,9315017,-0.019
2017-12-22,29469,29597,29395,29535,9227429,0.005
2017-12-26,29594,29772,29409,29416,8271046,-0.004
2017-12-27,29510,29754,29450,29761,13766103,0.012


In [27]:
pd.concat([samsung_df,kodex_df[['Open','High']]]).tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-12-21,29897,29952,,,,
2017-12-22,29469,29597,,,,
2017-12-26,29594,29772,,,,
2017-12-27,29510,29754,,,,
2017-12-28,29768,30160,,,,


In [29]:
pd.concat([samsung_df, kodex_df],keys=['삼성','KODEX200'],names=['종목명'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Change
종목명,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
삼성,2009-01-02,200,234,200,234,2122,0.083
삼성,2009-01-05,229,234,213,223,6890,-0.047
삼성,2009-01-06,210,234,210,212,2144,-0.049
삼성,2009-01-07,214,220,208,216,2434,0.019
삼성,2009-01-08,216,220,200,215,2280,-0.005
...,...,...,...,...,...,...,...
KODEX200,2017-12-21,29897,29952,29387,29394,9315017,-0.019
KODEX200,2017-12-22,29469,29597,29395,29535,9227429,0.005
KODEX200,2017-12-26,29594,29772,29409,29416,8271046,-0.004
KODEX200,2017-12-27,29510,29754,29450,29761,13766103,0.012


- axis = 1

In [30]:
pd.concat([samsung_df,kodex_df], axis=1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-02,200,234,200,234,2122,0.083,,,,,,
2009-01-05,229,234,213,223,6890,-0.047,,,,,,
2009-01-06,210,234,210,212,2144,-0.049,,,,,,
2009-01-07,214,220,208,216,2434,0.019,,,,,,
2009-01-08,216,220,200,215,2280,-0.005,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-21,1080,1095,1050,1050,526785,-0.032,29897.000,29952.000,29387.000,29394.000,9315017.000,-0.019
2017-12-22,1050,1065,1040,1050,575426,0.000,29469.000,29597.000,29395.000,29535.000,9227429.000,0.005
2017-12-26,1050,1070,1045,1060,441204,0.010,29594.000,29772.000,29409.000,29416.000,8271046.000,-0.004
2017-12-27,1080,1095,1060,1095,428161,0.033,29510.000,29754.000,29450.000,29761.000,13766103.000,0.012


In [31]:
pd.concat([samsung_df,kodex_df], axis=1, keys=['삼성','KODEX200'])

Unnamed: 0_level_0,삼성,삼성,삼성,삼성,삼성,삼성,KODEX200,KODEX200,KODEX200,KODEX200,KODEX200,KODEX200
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Change,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2009-01-02,200,234,200,234,2122,0.083,,,,,,
2009-01-05,229,234,213,223,6890,-0.047,,,,,,
2009-01-06,210,234,210,212,2144,-0.049,,,,,,
2009-01-07,214,220,208,216,2434,0.019,,,,,,
2009-01-08,216,220,200,215,2280,-0.005,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-21,1080,1095,1050,1050,526785,-0.032,29897.000,29952.000,29387.000,29394.000,9315017.000,-0.019
2017-12-22,1050,1065,1040,1050,575426,0.000,29469.000,29597.000,29395.000,29535.000,9227429.000,0.005
2017-12-26,1050,1070,1045,1060,441204,0.010,29594.000,29772.000,29409.000,29416.000,8271046.000,-0.004
2017-12-27,1080,1095,1060,1095,428161,0.033,29510.000,29754.000,29450.000,29761.000,13766103.000,0.012


- join argument

In [33]:
pd.concat([samsung_df,kodex_df],axis=1,keys=['삼성','KODEX200'],join='inner')

Unnamed: 0_level_0,삼성,삼성,삼성,삼성,삼성,삼성,KODEX200,KODEX200,KODEX200,KODEX200,KODEX200,KODEX200
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Change,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2016-01-04,1470,1485,1430,1430,862249,-0.017,21441,21449,21074,21086,7836785,-0.020
2016-01-05,1405,1440,1400,1400,844949,-0.021,21033,21283,21033,21218,14048444,0.006
2016-01-06,1415,1435,1390,1400,1061873,0.000,21207,21230,20946,21064,12301408,-0.007
2016-01-07,1405,1415,1350,1350,1184923,-0.036,21008,21087,20802,20862,12684117,-0.010
2016-01-08,1315,1365,1290,1360,886329,0.007,20732,21019,20687,21010,13864041,0.007
...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-21,1080,1095,1050,1050,526785,-0.032,29897,29952,29387,29394,9315017,-0.019
2017-12-22,1050,1065,1040,1050,575426,0.000,29469,29597,29395,29535,9227429,0.005
2017-12-26,1050,1070,1045,1060,441204,0.010,29594,29772,29409,29416,8271046,-0.004
2017-12-27,1080,1095,1060,1095,428161,0.033,29510,29754,29450,29761,13766103,0.012


In [36]:
pd.concat([samsung_df,kodex_df[['Close']]],axis=0,keys=['삼성','KODEX200'],join='inner')

Unnamed: 0_level_0,Unnamed: 1_level_0,Close
Unnamed: 0_level_1,Date,Unnamed: 2_level_1
삼성,2009-01-02,234
삼성,2009-01-05,223
삼성,2009-01-06,212
삼성,2009-01-07,216
삼성,2009-01-08,215
...,...,...
KODEX200,2017-12-21,29394
KODEX200,2017-12-22,29535
KODEX200,2017-12-26,29416
KODEX200,2017-12-27,29761
