# Pandas Cookbook

## 9장. Pandas 객체 합치기 

https://github.com/PacktPublishing/Pandas-Cookbook/tree/master/data

### 소개

### DataFrames에 새로운 행 추가

In [3]:
import numpy as np
import pandas as pd

In [14]:
pwd = %pwd
names = pd.read_csv(pwd + '/names.csv')
names.head()

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2


In [15]:
new_data_list = ['Aria', 1]
names.loc[4] = new_data_list
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [16]:
names.loc['five'] = ['Zach', 3]
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3


In [17]:
names.loc[len(names)] = {'Name':'Zayd', 'Age':2}
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3
6,Zayd,2


In [18]:
names.loc[len(names)] = pd.Series({'Age':32, 'Name':'Dean'})
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3
6,Zayd,2
7,Dean,32


In [20]:
pwd = %pwd
names = pd.read_csv(pwd + '/names.csv')
names.head()

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2


In [21]:
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [22]:
names.index = ['Canada' , 'Canada', 'USA', 'USA' ]
names

Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2


In [23]:
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [25]:
s = pd.Series({'Name': 'Zach', 'Age': 3}, name =len(names))
s

Name    Zach
Age        3
Name: 4, dtype: object

In [26]:
names.append(s)

Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2
4,Zach,3


In [27]:
s1 = pd.Series({'Name': 'Zach', 'Age': 3}, name =len(names))
s2 = pd.Series({'Name': 'Zayd', 'Age': 2}, name ='USA')
names.append([s1, s2])

Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2
4,Zach,3
USA,Zayd,2


In [30]:
pwd = %pwd
bball_16 = pd.read_csv(pwd + '/baseball16.csv')
bball_16.head()

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,...,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
0,altuvjo01,2016,1,HOU,AL,161,640,108,216,42,...,96.0,30.0,10.0,60,70.0,11.0,7.0,3.0,7.0,15.0
1,bregmal01,2016,1,HOU,AL,49,201,31,53,13,...,34.0,2.0,0.0,15,52.0,0.0,0.0,0.0,1.0,1.0
2,castrja01,2016,1,HOU,AL,113,329,41,69,16,...,32.0,2.0,1.0,45,123.0,0.0,1.0,1.0,0.0,9.0
3,correca01,2016,1,HOU,AL,153,577,76,158,36,...,96.0,13.0,3.0,75,139.0,5.0,5.0,0.0,3.0,12.0
4,gattiev01,2016,1,HOU,AL,128,447,58,112,19,...,72.0,2.0,1.0,43,127.0,6.0,4.0,0.0,5.0,12.0


In [31]:
data_dict = bball_16.iloc[0].to_dict()
print(data_dict)

{'playerID': 'altuvjo01', 'yearID': 2016, 'stint': 1, 'teamID': 'HOU', 'lgID': 'AL', 'G': 161, 'AB': 640, 'R': 108, 'H': 216, '2B': 42, '3B': 5, 'HR': 24, 'RBI': 96.0, 'SB': 30.0, 'CS': 10.0, 'BB': 60, 'SO': 70.0, 'IBB': 11.0, 'HBP': 7.0, 'SH': 3.0, 'SF': 7.0, 'GIDP': 15.0}


In [33]:
new_data_list = {k: '' if isinstance(v, str) else np.nan for k, v in data_dict.items()}
print(new_data_list)

{'playerID': '', 'yearID': nan, 'stint': nan, 'teamID': '', 'lgID': '', 'G': nan, 'AB': nan, 'R': nan, 'H': nan, '2B': nan, '3B': nan, 'HR': nan, 'RBI': nan, 'SB': nan, 'CS': nan, 'BB': nan, 'SO': nan, 'IBB': nan, 'HBP': nan, 'SH': nan, 'SF': nan, 'GIDP': nan}


In [42]:
random_data = []
for i in range(1000):
    d = dict()
    for k, v in data_dict.items():
        if isinstance(v, str):
            d[k] = np.random.choice(list('abcde'))
        else:
            d[k] = np.random.randint(10)
    random_data.append([pd.Series(d, name = i + len(bball_16))])

In [46]:
random_data[0]

[playerID    d
 yearID      6
 stint       7
 teamID      d
 lgID        e
 G           1
 AB          3
 R           8
 H           2
 2B          6
 3B          0
 HR          2
 RBI         8
 SB          8
 CS          3
 BB          7
 SO          0
 IBB         3
 HBP         0
 SH          1
 SF          8
 GIDP        7
 Name: 16, dtype: object]

In [49]:
%%timeit
bball_16_copy = bball_16.copy()
for row in random_data:
    bball_16_copy = bball_16_copy.append(row)

4.21 s ± 66.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [51]:
%%timeit
bball_16_copy = bball_16.copy()
bball_16_copy = bball_16_copy.append(random_data)

  result = result.union(other)


2.89 ms ± 54.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### 복수 DataFrames 연결

### 트럼프와 오바마 대통령 국정 수행 능력 평가 비교

### concat, join, merge 사이의 차이점 이해하기

### SQL 데이터베이스에 연결