### pandas
- 데이터 분석 라이브러리
- 행과 열로 이루어진 데이터 객체를 다룸
- 대용량의 데이터 처리에 편리함
- Series, Data Frame (Series(컬럼)가 모여서 Data Frame(로우+컬럼)이 된다)

#### Series

In [4]:
from pandas import Series, DataFrame
import pandas as pd

fruit = Series(data = [2500, 3800, 1200, 6000],
               index=['apple', 'banana', 'peer', 'cherry'])
print(fruit)

apple     2500
banana    3800
peer      1200
cherry    6000
dtype: int64


In [7]:
fruit = Series([2500, 3800, 1200, 6000],
               ['apple', 'banana', 'peer', 'cherry']) # data와 index 생략 가능 (처음 오는게 data, 뒤로 오는게 index로 처리)
print(fruit)

apple     2500
banana    3800
peer      1200
cherry    6000
dtype: int64


In [8]:
pd.Series([2500, 3800, 1200, 6000],
              ['apple', 'banana', 'peer', 'cherry'])

apple     2500
banana    3800
peer      1200
cherry    6000
dtype: int64

In [16]:
# 특정값 추출
print(fruit.values)

[2500 3800 1200 6000]


In [17]:
print(fruit.index)

Index(['apple', 'banana', 'peer', 'cherry'], dtype='object')


In [18]:
fruitData = {'apple':2500, 'banana':2000, 'peer':1200}
fruit = Series(fruitData) # 딕셔너리 -> 시리즈

In [19]:
print(fruit)

apple     2500
banana    2000
peer      1200
dtype: int64


In [20]:
fruit.name = "fruitPrice"
fruit.index.name = "fruitName"
fruit

fruitName
apple     2500
banana    2000
peer      1200
Name: fruitPrice, dtype: int64

#### DataFrame 생성

In [21]:
fruitData = {'fruitName': ['apple', 'banana', 'cherry', 'peer'],
            'fruitPrice': [2500, 3000, 2000, 4300],
            'num': [10, 20, 3, 40]}
fruitFrame = DataFrame(fruitData)
fruitFrame

Unnamed: 0,fruitName,fruitPrice,num
0,apple,2500,10
1,banana,3000,20
2,cherry,2000,3
3,peer,4300,40


In [22]:
fruitFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
fruitName     4 non-null object
fruitPrice    4 non-null int64
num           4 non-null int64
dtypes: int64(2), object(1)
memory usage: 176.0+ bytes


In [32]:
fruitData = {'fruitName': ['apple', 'banana', 'cherry', 'peer'],
            'fruitPrice': [2500, 3000, 2000, 4300],
            'num': [10, 20, 3, 40]}
fruitFrame = DataFrame(fruitData, columns=['fruitName', 'num', 'fruitPrice']) # 순서 변경
fruitFrame

Unnamed: 0,fruitName,num,fruitPrice
0,apple,10,2500
1,banana,20,3000
2,cherry,3,2000
3,peer,40,4300


In [37]:
fruitFrame["fruitName"]

0     apple
1    banana
2    cherry
3      peer
Name: fruitName, dtype: object

In [38]:
type(fruitFrame)

pandas.core.frame.DataFrame

In [39]:
type(fruitFrame["fruitName"])

pandas.core.series.Series

In [41]:
# 특정컬럼 출력
fruitFrame.fruitName

0     apple
1    banana
2    cherry
3      peer
Name: fruitName, dtype: object

In [42]:
# 컬럼 추가
fruitFrame["year"] = 2016
fruitFrame

Unnamed: 0,fruitName,num,fruitPrice,year
0,apple,10,2500,2016
1,banana,20,3000,2016
2,cherry,3,2000,2016
3,peer,40,4300,2016


In [44]:
# 컬럼 추가 (다른값 추가) 
val = Series([4,2,1], index=[0,2,3])
val

0    4
2    2
3    1
dtype: int64

In [45]:
fruitFrame['stock'] = val
fruitFrame

Unnamed: 0,fruitName,num,fruitPrice,year,stock
0,apple,10,2500,2016,4.0
1,banana,20,3000,2016,
2,cherry,3,2000,2016,2.0
3,peer,40,4300,2016,1.0


In [34]:
import numpy as np

df2 = pd.DataFrame(np.array([[1,2,3], [4,5,6], [7,8,9]]),
                columns=['a', 'b', 'c'])
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [35]:
np.array([[1,2,3], [4,5,6], [7,8,9]])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [46]:
# 데이터 삭제
fruit = Series(data = [2500, 3800, 1200, 6000],
               index=['apple', 'banana', 'peer', 'cherry'])
fruit.drop('banana') # inplace가 false이기 때문에 실제로 삭제되지 않음
# fruit.drop('banana', inplace=True) // 삭제하고 싶을 시 inplace를 True로 두기

# 보통은 삭제하지 않고 다음과 같이 새로운 Data Frame을 만들어서 사용
new_fruit = fruit.drop('banana')
new_fruit

apple     2500
peer      1200
cherry    6000
dtype: int64

In [49]:
fruitData = {'fruitName': ['apple', 'banana', 'cherry', 'peer'],
            'fruitPrice': [2500, 3000, 2000, 4300],
            'num': [10, 20, 3, 40]}

In [50]:
fruitData

{'fruitName': ['apple', 'banana', 'cherry', 'peer'],
 'fruitPrice': [2500, 3000, 2000, 4300],
 'num': [10, 20, 3, 40]}

In [51]:
fruitName = fruitData['fruitName']
fruitName

['apple', 'banana', 'cherry', 'peer']

In [54]:
fruitFrame = DataFrame(fruitData, index = fruitName, columns=['fruitPrice', 'num'])
fruitFrame

Unnamed: 0,fruitPrice,num
apple,2500,10
banana,3000,20
cherry,2000,3
peer,4300,40


In [55]:
# 행 삭제
fruitFrame2 = fruitFrame.drop(['apple', 'cherry'])
fruitFrame2

Unnamed: 0,fruitPrice,num
banana,3000,20
peer,4300,40


In [57]:
# 열 삭제 (axis=1 옵션을 줘야한다)
fruitFrame3 = fruitFrame.drop('num', axis=1)
fruitFrame3

Unnamed: 0,fruitPrice
apple,2500
banana,3000
cherry,2000
peer,4300


#### 항목 추출하기

In [58]:
fruit = Series(data = [2500, 3800, 1200, 6000],
               index=['apple', 'banana', 'peer', 'cherry'])
fruit

apple     2500
banana    3800
peer      1200
cherry    6000
dtype: int64

In [59]:
fruit["apple":"peer"]

apple     2500
banana    3800
peer      1200
dtype: int64

In [60]:
fruitData = {'fruitName': ['apple', 'banana', 'cherry', 'peer'],
            'fruitPrice': [2500, 3000, 2000, 4300],
            'num': [10, 20, 3, 40]}
fruitData

{'fruitName': ['apple', 'banana', 'cherry', 'peer'],
 'fruitPrice': [2500, 3000, 2000, 4300],
 'num': [10, 20, 3, 40]}

In [63]:
fruitName = fruitData['fruitName']
fruitFrame = DataFrame(fruitData, index=fruitName, columns=['fruitPrice', 'num'])
fruitFrame

Unnamed: 0,fruitPrice,num
apple,2500,10
banana,3000,20
cherry,2000,3
peer,4300,40


In [65]:
# 열 가져오기
fruitFrame['fruitPrice']

apple     2500
banana    3000
cherry    2000
peer      4300
Name: fruitPrice, dtype: int64

In [67]:
# 행 슬라이싱해서 가져오기
fruitFrame['apple':'banana']

Unnamed: 0,fruitPrice,num
apple,2500,10
banana,3000,20


#### 데이터 기본연산

In [68]:
fruit1 = Series([5, 9, 10, 3], index=["apple", "banana", "cherry", "peer"])
fruit1

apple      5
banana     9
cherry    10
peer       3
dtype: int64

In [69]:
fruit2 = Series([3,2,9,5,10], index=["apple", "orange", "banana", "cherry", "mango"])
fruit2

apple      3
orange     2
banana     9
cherry     5
mango     10
dtype: int64

In [70]:
fruit1 + fruit2 # 시리즈끼리 연산

apple      8.0
banana    18.0
cherry    15.0
mango      NaN
orange     NaN
peer       NaN
dtype: float64

In [73]:
fruitData1 = {"Ohio":[4,8,3,5],
             "Texas":[0,2,3,4]}
fruitFrame1 = DataFrame(fruitData1, columns=["Ohio", "Texas"], index=["apple", "banana", "cherry", "peer"])
fruitFrame1

Unnamed: 0,Ohio,Texas
apple,4,0
banana,8,2
cherry,3,3
peer,5,4


In [75]:
fruitData2 = {"Ohio":[3,4,2,6,4],
             "Colorado":[4,2,6,7,4]}
fruitFrame2 = DataFrame(fruitData2, columns=["Ohio", "Colorado"],
                        index=["apple", "orange", "banana", "cherry", "peer"])
fruitFrame2

Unnamed: 0,Ohio,Colorado
apple,3,4
orange,4,2
banana,2,6
cherry,6,7
peer,4,4


In [76]:
fruitFrame1 + fruitFrame2

Unnamed: 0,Colorado,Ohio,Texas
apple,,7.0,
banana,,10.0,
cherry,,9.0,
orange,,,
peer,,9.0,


In [77]:
fruit = Series(data = [2500, 3800, 1200, 6000],
               index=['apple', 'banana', 'peer', 'cherry'])

apple     2500
banana    3800
peer      1200
cherry    6000
dtype: int64

In [80]:
fruit.sort_values(ascending=False)

cherry    6000
banana    3800
apple     2500
peer      1200
dtype: int64

In [81]:
fruitData = {'fruitName': ['apple', 'banana', 'cherry', 'peer'],
            'fruitPrice': [2500, 3000, 2000, 4300],
            'num': [10, 20, 3, 40]}
fruitName = fruitData['fruitName']
fruitFrame = DataFrame(fruitData, index=fruitName, columns=['fruitPrice', 'num'])
fruitFrame

Unnamed: 0,fruitPrice,num
apple,2500,10
banana,3000,20
cherry,2000,3
peer,4300,40


In [82]:
fruitFrame.sort_values(by=['fruitPrice'])

Unnamed: 0,fruitPrice,num
cherry,2000,3
apple,2500,10
banana,3000,20
peer,4300,40


#### pandas 기초분석