# 데이터프레임

In [1]:
import pandas as pd

In [5]:
# 열이름을 key로 하고, 리스트를 value로 갖는 딕셔너리 정의(2차원 배열)
dict_data = {'c0':[1,2,3], 'c1':[4,5,6], 
             'c2':[7,8,9], 'c3':[10,11,12], 'c4':[13,14,15]}

# 판다스 DataFrame() 함수로 딕셔너리를 데이터 프레임으로 변환, 변수 df에 저장
df = pd.DataFrame(dict_data)

# df의 자료형 출력
print(type(df))
print('\n')
# 변수 df에 저장되어 있는 데이터프레임 객체를 출력
print(df)

<class 'pandas.core.frame.DataFrame'>


   c0  c1  c2  c3  c4
0   1   4   7  10  13
1   2   5   8  11  14
2   3   6   9  12  15


In [8]:
# 행 인덱스 / 열 이름 설정

# df.index : 행 인덱스
# df.columns : 열 이름

df = pd.DataFrame([[15, '남', '덕영중'], [17, '여', '수리중']], 
                 index = ['준서', '예은'], 
                 columns = ['나이', '성별', '학교'])

print(df)
print('\n')
print(df.index)
print('\n')
print(df.columns)
print('\n')

# 모든 행이나 열을 바꿀 때
df.index = ['학생1', '학생2']
df.columns = ['연령', '남녀', '소속']

print(df)
print('\n')
print(df.index)
print('\n')
print(df.columns)
print('\n')

    나이 성별   학교
준서  15  남  덕영중
예은  17  여  수리중


Index(['준서', '예은'], dtype='object')


Index(['나이', '성별', '학교'], dtype='object')


     연령 남녀   소속
학생1  15  남  덕영중
학생2  17  여  수리중


Index(['학생1', '학생2'], dtype='object')


Index(['연령', '남녀', '소속'], dtype='object')




특정 행의 인덱스나 열 이름을 수정할 때

In [12]:
# 특정 행이나 열을 바꿀 때
# 학생1만

df.rename(index = {'학생1' : '하은'})

Unnamed: 0,연령,남녀,소속
하은,15,남,덕영중
학생2,17,여,수리중


In [13]:
df.rename(columns = {'연령' : 'age'})

# 출력에서 볼 수 있듯이 df는 원본 데이터가 바뀌지 않는다.
# 그래서 사용하는 것이 inplace=True (default=False)

Unnamed: 0,age,남녀,소속
학생1,15,남,덕영중
학생2,17,여,수리중


In [15]:
df.rename(columns = {'연령' : 'age'}, inplace = True)
df

Unnamed: 0,age,남녀,소속
학생1,15,남,덕영중
학생2,17,여,수리중


In [16]:
# 또는 inplace를 사용하지 않고 덮어쓰기
df = df.rename(index = {'학생1' : '하은'})
df

Unnamed: 0,age,남녀,소속
하은,15,남,덕영중
학생2,17,여,수리중


데이터프레임 행 삭제

In [20]:
# 행: axis = 0 (default) : index
# 열: axis = 1           : columns

# df.drop('age', axis=0)  # 열 이름인데 행 삭제 옵션을 써서 오류가 남
df.drop('age', axis=1)  # 열 삭제

Unnamed: 0,남녀,소속
하은,남,덕영중
학생2,여,수리중


In [24]:
df = pd.DataFrame([[15, '남', '덕영중'], [17, '여', '수리중']], 
                 index = ['준서', '예은'], 
                 columns = ['나이', '성별', '학교'])
df

Unnamed: 0,나이,성별,학교
준서,15,남,덕영중
예은,17,여,수리중


In [29]:
df['나이']

준서    15
예은    17
Name: 나이, dtype: int64

column명으로는 접근이 쉬운데 (= 인덱스로 접근이 가능) \
레코드를 추출하는 것은 loc 또는, iloc를 사용 (= 인덱스로 접근 불가능) \
df.loc[행 이름] \
df.iloc[인덱스] 

In [28]:
label1 = df.loc['준서']
label1

나이     15
성별      남
학교    덕영중
Name: 준서, dtype: object

In [41]:
exam_data = {'이름' : [ '서준', '우현', '인아'], 
             '수학' : [ 90, 80, 70], 
             '영어' : [ 98, 89, 95], 
             '음악' : [ 85, 95, 100], 
             '체육' : [ 100, 90, 90]}
df = pd.DataFrame(exam_data)
df

Unnamed: 0,이름,수학,영어,음악,체육
0,서준,90,98,85,100
1,우현,80,89,95,90
2,인아,70,95,100,90


In [42]:
# 한 컬럼을 데이터프레임의 인덱스로 변경
df.set_index('이름', inplace = True)
df

Unnamed: 0_level_0,수학,영어,음악,체육
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
서준,90,98,85,100
우현,80,89,95,90
인아,70,95,100,90


In [43]:
df['총점'] = df['수학'] + df['영어'] + df['음악'] + df['체육']
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
서준,90,98,85,100,373
우현,80,89,95,90,354
인아,70,95,100,90,355


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 서준 to 인아
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   수학      3 non-null      int64
 1   영어      3 non-null      int64
 2   음악      3 non-null      int64
 3   체육      3 non-null      int64
 4   총점      3 non-null      int64
dtypes: int64(5)
memory usage: 144.0+ bytes


In [45]:
# 한 컬럼에 한 값을 넣으면 모든 레코드의 그 컬럼 값이 한 값으로 바뀜
df['평균'] = 0
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서준,90,98,85,100,373,0
우현,80,89,95,90,354,0
인아,70,95,100,90,355,0


In [46]:
# 마찬가지로 행도 똑같이 한 값이 다 들어간다.
df.loc['길동'] = 0
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서준,90,98,85,100,373,0
우현,80,89,95,90,354,0
인아,70,95,100,90,355,0
길동,0,0,0,0,0,0


In [51]:
# 행열 바꾸기
# 전치 행렬
# T 또는 transpose() 함수 사용

df.T
df.transpose()

이름,서준,우현,인아,길동
수학,90,80,70,0
영어,98,89,95,0
음악,85,95,100,0
체육,100,90,90,0
총점,373,354,355,0
평균,0,0,0,0


In [54]:
df['평균'] = df['총점']/4
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서준,90,98,85,80,373,93.25
우현,80,89,95,90,354,88.5
인아,70,95,100,90,355,88.75
길동,0,0,0,0,0,0.0


행 삭제

In [55]:
df.drop('길동', inplace = True)
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서준,90,98,85,80,373,93.25
우현,80,89,95,90,354,88.5
인아,70,95,100,90,355,88.75


평균 점수 출력

In [56]:
for row in df['평균']:
    print(row)

93.25
88.5
88.75


In [66]:
grades = []
for row in df['평균']:
    if row >= 90:
        grades.append('합격')
    else:
        grades.append('불합격')
df['grades'] = grades
df

Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균,결과,grades
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
서준,90,98,85,80,373,93.25,불합격,합격
우현,80,89,95,90,354,88.5,불합격,불합격
인아,70,95,100,90,355,88.75,불합격,불합격


In [71]:
def scholarship(row):
    print(row)
    if row == '합격':
        return '장학생'
    else:
        return '비장학생'

In [74]:
df.grades = df.grades.apply(scholarship)
df

합격
불합격
불합격


Unnamed: 0_level_0,수학,영어,음악,체육,총점,평균,결과,grades
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
서준,90,98,85,80,373,93.25,불합격,장학생
우현,80,89,95,90,354,88.5,불합격,비장학생
인아,70,95,100,90,355,88.75,불합격,비장학생


In [75]:
df = pd.read_csv('csv_sample.csv')
df

Unnamed: 0,c0,c1,c2,c3
0,0,1,4,7
1,1,2,5,8
2,2,3,6,9


In [76]:
df.to_csv('test_csv')

# 실습

In [88]:
df = pd.read_csv('data/auto-mpg.csv', header = None)

In [89]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.00,2790.0,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52.00,2130.0,24.6,82,2,vw pickup
395,32.0,4,135.0,84.00,2295.0,11.6,82,1,dodge rampage
396,28.0,4,120.0,79.00,2625.0,18.6,82,1,ford ranger


In [90]:
df.columns = ['mpg','cylinders','displacement', 
              'horsepower','weight', 'acceleration', 
              'model year','origin','name']

In [91]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [92]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    object 
 4   weight        398 non-null    float64
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   name          398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [93]:
df.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'name'],
      dtype='object')

In [94]:
df.describe()

Unnamed: 0,mpg,cylinders,displacement,weight,acceleration,model year,origin
count,398.0,398.0,398.0,398.0,398.0,398.0,398.0
mean,23.514573,5.454774,193.425879,2970.424623,15.56809,76.01005,1.572864
std,7.815984,1.701004,104.269838,846.841774,2.757689,3.697627,0.802055
min,9.0,3.0,68.0,1613.0,8.0,70.0,1.0
25%,17.5,4.0,104.25,2223.75,13.825,73.0,1.0
50%,23.0,4.0,148.5,2803.5,15.5,76.0,1.0
75%,29.0,8.0,262.0,3608.0,17.175,79.0,2.0
max,46.6,8.0,455.0,5140.0,24.8,82.0,3.0


In [95]:
df.count()

mpg             398
cylinders       398
displacement    398
horsepower      398
weight          398
acceleration    398
model year      398
origin          398
name            398
dtype: int64

In [97]:
df.shape

(398, 9)

In [98]:
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower       object
weight          float64
acceleration    float64
model year        int64
origin            int64
name             object
dtype: object

In [102]:
# 특정 열이 가지고 있는 고유값
df['origin'].value_counts()

1    249
3     79
2     70
Name: origin, dtype: int64

In [100]:
# 컬럼에 들어가 있는 독립적인 값의 종류 : 중복 x
df['origin'].unique()

array([1, 3, 2], dtype=int64)

In [101]:
# 원소 개수
df.count()

mpg             398
cylinders       398
displacement    398
horsepower      398
weight          398
acceleration    398
model year      398
origin          398
name            398
dtype: int64

상관관계

In [103]:
# 상관계수
# 반드시 알아둘 것

df.corr()

Unnamed: 0,mpg,cylinders,displacement,weight,acceleration,model year,origin
mpg,1.0,-0.775396,-0.804203,-0.831741,0.420289,0.579267,0.56345
cylinders,-0.775396,1.0,0.950721,0.896017,-0.505419,-0.348746,-0.562543
displacement,-0.804203,0.950721,1.0,0.932824,-0.543684,-0.370164,-0.609409
weight,-0.831741,0.896017,0.932824,1.0,-0.417457,-0.306564,-0.581024
acceleration,0.420289,-0.505419,-0.543684,-0.417457,1.0,0.288137,0.205873
model year,0.579267,-0.348746,-0.370164,-0.306564,0.288137,1.0,0.180662
origin,0.56345,-0.562543,-0.609409,-0.581024,0.205873,0.180662,1.0


In [104]:
# 1에 가까울 수록 양의 상관관계
# -1에 가까울 수록 음의 상관관계
# 0에 가까울 수록 상관관계가 없음

df[['mpg', 'weight']].corr()

Unnamed: 0,mpg,weight
mpg,1.0,-0.831741
weight,-0.831741,1.0


# 필터링

In [105]:
df[ df.mpg > 40 ]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name
244,43.1,4,90.0,48.00,1985.0,21.5,78,2,volkswagen rabbit custom diesel
309,41.5,4,98.0,76.00,2144.0,14.7,80,2,vw rabbit
322,46.6,4,86.0,65.00,2110.0,17.9,80,3,mazda glc
324,40.8,4,85.0,65.00,2110.0,19.2,80,3,datsun 210
325,44.3,4,90.0,48.00,2085.0,21.7,80,2,vw rabbit c (diesel)
326,43.4,4,90.0,48.00,2335.0,23.7,80,2,vw dasher (diesel)
329,44.6,4,91.0,67.00,1850.0,13.8,80,3,honda civic 1500 gl
330,40.9,4,85.0,?,1835.0,17.3,80,2,renault lecar deluxe
394,44.0,4,97.0,52.00,2130.0,24.6,82,2,vw pickup


In [106]:
# 위와 같은 결과
df.query('mpg>40')     # query함수 사용

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name
244,43.1,4,90.0,48.00,1985.0,21.5,78,2,volkswagen rabbit custom diesel
309,41.5,4,98.0,76.00,2144.0,14.7,80,2,vw rabbit
322,46.6,4,86.0,65.00,2110.0,17.9,80,3,mazda glc
324,40.8,4,85.0,65.00,2110.0,19.2,80,3,datsun 210
325,44.3,4,90.0,48.00,2085.0,21.7,80,2,vw rabbit c (diesel)
326,43.4,4,90.0,48.00,2335.0,23.7,80,2,vw dasher (diesel)
329,44.6,4,91.0,67.00,1850.0,13.8,80,3,honda civic 1500 gl
330,40.9,4,85.0,?,1835.0,17.3,80,2,renault lecar deluxe
394,44.0,4,97.0,52.00,2130.0,24.6,82,2,vw pickup


# filter() 이용 필터링

In [107]:
# like
# m이 속해 있는 컬럼명을 추출
df.filter(like = 'm', axis = 1)

Unnamed: 0,mpg,displacement,model year,name
0,18.0,307.0,70,chevrolet chevelle malibu
1,15.0,350.0,70,buick skylark 320
2,18.0,318.0,70,plymouth satellite
3,16.0,304.0,70,amc rebel sst
4,17.0,302.0,70,ford torino
...,...,...,...,...
393,27.0,140.0,82,ford mustang gl
394,44.0,97.0,82,vw pickup
395,32.0,135.0,82,dodge rampage
396,28.0,120.0,82,ford ranger


In [108]:
# regex = '~$'
# ~으로 끝나는 컬럼을 추출
df.filter(regex='n$', axis=1)

Unnamed: 0,acceleration,origin
0,12.0,1
1,11.5,1
2,11.0,1
3,12.0,1
4,10.5,1
...,...,...
393,15.6,1
394,24.6,2
395,11.6,1
396,18.6,1


In [111]:
# regex = '^~'
# ~으로 시작하는 컬럼 추출
df.filter(regex='^m', axis=1)

Unnamed: 0,mpg,model year
0,18.0,70
1,15.0,70
2,18.0,70
3,16.0,70
4,17.0,70
...,...,...
393,27.0,82
394,44.0,82
395,32.0,82
396,28.0,82


In [120]:
df_groupby = df.groupby(by=['model year'])
df_groupby.groups

{70: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], 71: [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], 72: [57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84], 73: [85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124], 74: [125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151], 75: [152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181], 76: [182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213

In [121]:
for year, group in df_groupby:
    print(year, "년: ", len(group))
    print(group)
    print()

70 년:  29
     mpg  cylinders  displacement horsepower  weight  acceleration  \
0   18.0          8         307.0      130.0  3504.0          12.0   
1   15.0          8         350.0      165.0  3693.0          11.5   
2   18.0          8         318.0      150.0  3436.0          11.0   
3   16.0          8         304.0      150.0  3433.0          12.0   
4   17.0          8         302.0      140.0  3449.0          10.5   
5   15.0          8         429.0      198.0  4341.0          10.0   
6   14.0          8         454.0      220.0  4354.0           9.0   
7   14.0          8         440.0      215.0  4312.0           8.5   
8   14.0          8         455.0      225.0  4425.0          10.0   
9   15.0          8         390.0      190.0  3850.0           8.5   
10  15.0          8         383.0      170.0  3563.0          10.0   
11  14.0          8         340.0      160.0  3609.0           8.0   
12  15.0          8         400.0      150.0  3761.0           9.5   
13  14.0  

In [122]:
df.groupby(by=['model year']).count()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,origin,name
model year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
70,29,29,29,29,29,29,29,29
71,28,28,28,28,28,28,28,28
72,28,28,28,28,28,28,28,28
73,40,40,40,40,40,40,40,40
74,27,27,27,27,27,27,27,27
75,30,30,30,30,30,30,30,30
76,34,34,34,34,34,34,34,34
77,28,28,28,28,28,28,28,28
78,36,36,36,36,36,36,36,36
79,29,29,29,29,29,29,29,29


In [123]:
df.groupby(by=['model year']).mean()

Unnamed: 0_level_0,mpg,cylinders,displacement,weight,acceleration,origin
model year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,17.689655,6.758621,281.413793,3372.793103,12.948276,1.310345
71,21.25,5.571429,209.75,2995.428571,15.142857,1.428571
72,18.714286,5.821429,218.375,3237.714286,15.125,1.535714
73,17.1,6.375,256.875,3419.025,14.3125,1.375
74,22.703704,5.259259,171.740741,2877.925926,16.203704,1.666667
75,20.266667,5.6,205.533333,3176.8,16.05,1.466667
76,21.573529,5.647059,197.794118,3078.735294,15.941176,1.470588
77,23.375,5.464286,191.392857,2997.357143,15.435714,1.571429
78,24.061111,5.361111,177.805556,2861.805556,15.805556,1.611111
79,25.093103,5.827586,206.689655,3055.344828,15.813793,1.275862


# 데이터프레임 합치기

In [125]:
l1 = [{'name': 'John', 'job': "teacher"},
{'name': 'Nate', 'job': "student"},
{'name': 'Fred', 'job': "developer"}]

l2 = [{'name': 'Ed', 'job': "dentist"},
{'name': 'Jack', 'job': "farmer"},
{'name': 'Ted', 'job': "designer"}]

df1 = pd.DataFrame(l1, columns = ['name', 'job'])
df2 = pd.DataFrame(l2, columns = ['name', 'job'])


In [126]:
df1

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer


In [127]:
df2

Unnamed: 0,name,job
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


# append() 사용

In [128]:
result = df1.append(df2)
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [129]:
# 기존의 인덱스를 삭제하고 새로운 행의 인덱스를 새롭게 부여함
result = df1.append(df2, ignore_index = True)
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


# concat() 사용

In [132]:
result = pd.concat([df1, df2]) # axis = 0: default: 새로운 행으로 붙이기
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [133]:
# ignore_index = False : default
# ignore_index = True : 기존의 인덱스 무시
result = pd.concat([df1, df2], ignore_index = True) # axis = 0: default: 새로운 행으로 붙이기
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


In [130]:
result = pd.concat([df1, df2], axis=1) # 열로 붙이기
result

Unnamed: 0,name,job,name.1,job.1
0,John,teacher,Ed,dentist
1,Nate,student,Jack,farmer
2,Fred,developer,Ted,designer


In [131]:
result = pd.concat([df1, df2], axis=1, ignore_index = True) # axis = 1: 열로 붙이기
result

Unnamed: 0,0,1,2,3
0,John,teacher,Ed,dentist
1,Nate,student,Jack,farmer
2,Fred,developer,Ted,designer


# merge() 사용

In [None]:
# how = 'inner' : 어떻게 합칠 건지에 대한 옵션