### pandas
* pandas 안에 numpy 사용 : pandas를 사용하려면 numpy 설치되어있어야함
* 표형식의 데이터를 다루기 편함(대부분의 데이터들은 표형식)
* 표 -> 2차원 배열 -> 행렬(행,열) -> DataFrame
* DataFrame은 표형식의 데이터를 담는 자료형
* 행 : 로우(row), 레코드, 인덱스
* 열 : 컬럼(column), 속성, 특징, 피쳐, 변수

* numpy array와 dataframe의 차이점
    - numpy array : 숫자 인덱스
    - dataframe : 문자도 인덱스로 사용 가능, 열은 같은 자료형

In [1]:
import pandas as pd

In [2]:
plist = [
    ['test1',1,2],
    ['test2',3,4],
    ['test3',5,6]
]
df = pd.DataFrame(plist)
df

Unnamed: 0,0,1,2
0,test1,1,2
1,test2,3,4
2,test3,5,6


In [3]:
df = pd.DataFrame(plist, columns=['name','age','grade'])
df

Unnamed: 0,name,age,grade
0,test1,1,2
1,test2,3,4
2,test3,5,6


In [4]:
df = pd.DataFrame(plist, columns=['name','age','grade'], index=['a','b','c'])
df

Unnamed: 0,name,age,grade
a,test1,1,2
b,test2,3,4
c,test3,5,6


In [5]:
df.dtypes # 열의 자료형 확인

name     object
age       int64
grade     int64
dtype: object

In [3]:
# csv파일을 읽기
# 헤더가 없는 경우 header=None
# 첫번째 컬럼을 인덱스로 지정하고 싶은 경우 index_col=0
# * 문자열로 인덱스명, 컬럼명이 지정이되어있지만, 내부적으로는 인덱스(숫자)도 가지고 있음
df = pd.read_csv('iphone.csv', index_col=0)
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [4]:
# DataFrame 인덱싱, 슬라이싱
# loc (레이블로 위치 지정)
# iloc (인덱스로 위치 지정)
# 예) df.loc['인덱스명', '컬럼명'] [행,열]
# 예) df.iloc[행인덱스, 열인덱스]

In [5]:
df.loc['iPhone 7 Plus', '메모리']

'3GB'

In [6]:
df.iloc[1,2]

'3GB'

In [7]:
# Series
# 1차원 자료형
# Series가 모여서 DataFrame
df.iloc[0:7,2]

iPhone 7         2GB
iPhone 7 Plus    3GB
iPhone 8         2GB
iPhone 8 Plus    3GB
iPhone X         3GB
iPhone XS        4GB
iPhone XS Max    4GB
Name: 메모리, dtype: object

In [8]:
type(df.iloc[0:7,2])

pandas.core.series.Series

In [9]:
type(df.iloc[0,:])

pandas.core.series.Series

In [10]:
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [11]:
# loc를 이용해서 iPhone7~ iPhone 8 Plus의 메모리~Face ID 출력
df.loc['iPhone 7':'iPhone 8 Plus','메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 7 Plus,3GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No
iPhone 8 Plus,3GB,iOS 11.0,No


In [12]:
# iPhone 7, iPhone 8의 메모리~Face ID 출력
df.loc[['iPhone 7', 'iPhone 8'],'메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No


In [13]:
# bool 연산
df['디스플레이'] > 5 # 브로드캐스팅

iPhone 7         False
iPhone 7 Plus     True
iPhone 8         False
iPhone 8 Plus     True
iPhone X          True
iPhone XS         True
iPhone XS Max     True
Name: 디스플레이, dtype: bool

In [14]:
df.loc[df['디스플레이'] > 5]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [15]:
# 2개 이상 연산할 때 괄호로 묶어서
# and연산은 &, or연산은 |
# 디스플레이가 5인치보다 크고, 메모리가 3GB인 것
df.loc[(df['디스플레이']>5) & (df['메모리']=='3GB')]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes


In [16]:
(df['디스플레이']>5) & (df['메모리']=='3GB')

iPhone 7         False
iPhone 7 Plus     True
iPhone 8         False
iPhone 8 Plus     True
iPhone X          True
iPhone XS        False
iPhone XS Max    False
dtype: bool

In [42]:
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [44]:
# 특정값을 수정
df.loc['iPhone 7 Plus', '출시 버전'] = 'iOS 11.0'

In [45]:
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 11.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [49]:
# 출시 버전을 iOS 13.0 전부 수정
df.loc[:,'출시 버전'] = 'iOS 13.0'
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 13.0,Yes


In [51]:
df.loc[:,'디스플레이'] = df.loc[:,'디스플레이']+1

In [52]:
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,5.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,6.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,5.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,6.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,6.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,6.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,7.5,4GB,iOS 13.0,Yes


In [57]:
df2 = df # 얕은 복사
df2 = df.copy() # 깊은 복사

In [54]:
df2

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,5.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,6.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,5.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,6.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,6.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,6.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,7.5,4GB,iOS 13.0,Yes


In [58]:
df2.loc[:,'디스플레이'] = df2.loc[:,'디스플레이']+1
df2

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,7.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,8.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,7.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,8.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,8.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,8.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,9.5,4GB,iOS 13.0,Yes


In [59]:
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes


In [62]:
# 새로운 행(로우)을 추가
# 없는 인덱스(명)으로 대입
df.loc['iPhone XR'] = ['2019-01-01',9.5, '8GB', 'iOS13.0', 'Yes']
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes
iPhone XR,2019-01-01,9.5,8GB,iOS13.0,Yes


In [63]:
# 삭제
# drop() : *삭제하고난 결과만 리턴
# df = df.drop(..)
# 매개변수 inplace
# axis - 0(index):행, 1(columns):열
df.drop('iPhone XR')

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes


In [64]:
df # 기존 데이터는 삭제 안됨

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes
iPhone XR,2019-01-01,9.5,8GB,iOS13.0,Yes


In [65]:
df.drop('iPhone XR', inplace=True)
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes


In [67]:
# 열을 추가
# 새로운 열에 값을 대입
df['제조사'] = 'Apple'
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID,제조사
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No,Apple
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No,Apple
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No,Apple
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No,Apple
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes,Apple
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes,Apple
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes,Apple


In [70]:
# 열 삭제
df.drop('제조사', axis='columns')

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes


In [72]:
df = df.drop('제조사', axis='columns')
df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,6.7,2GB,iOS 13.0,No
iPhone 7 Plus,2016-09-16,7.5,3GB,iOS 13.0,No
iPhone 8,2017-09-22,6.7,2GB,iOS 13.0,No
iPhone 8 Plus,2017-09-22,7.5,3GB,iOS 13.0,No
iPhone X,2017-11-03,7.8,3GB,iOS 13.0,Yes
iPhone XS,2018-09-21,7.8,4GB,iOS 13.0,Yes
iPhone XS Max,2018-09-21,8.5,4GB,iOS 13.0,Yes


In [75]:
print(df.index)
print(df.columns)

Index(['iPhone 7', 'iPhone 7 Plus', 'iPhone 8', 'iPhone 8 Plus', 'iPhone X',
       'iPhone XS', 'iPhone XS Max'],
      dtype='object')
Index(['출시일', '디스플레이', '메모리', '출시 버전', 'Face ID'], dtype='object')


In [77]:
df.index[0:2]

Index(['iPhone 7', 'iPhone 7 Plus'], dtype='object')

In [82]:
df.drop(df.index[[0,2]], axis=0, inplace=True)

In [84]:
df.drop(['iPhone 7', 'iPhone 7 Plus'], axis=0, inplace=True)

KeyError: "['iPhone 7'] not found in axis"