# 11.1 Pandas의 기초
## Pandas의 데이터 형식
### Series 형식

In [1]:
#p173
import pandas as pd
pd.Series([7, 3, 5, 8])

0    7
1    3
2    5
3    8
dtype: int64

#### index

In [2]:
x = pd.Series([7, 3, 5, 8], index=['서울', '대구', '부산', '광주'])
print(x)
x['서울']

서울    7
대구    3
부산    5
광주    8
dtype: int64


7

In [3]:
x[['서울', '대구']]

서울    7
대구    3
dtype: int64

In [4]:
#p174
x.index

Index(['서울', '대구', '부산', '광주'], dtype='object')

#### sorted()

In [5]:
print(sorted(x.index))

['광주', '대구', '부산', '서울']


In [6]:
sorted(x.values)

[3, 5, 7, 8]

In [7]:
x.reindex(sorted(x.index))

광주    8
대구    3
부산    5
서울    7
dtype: int64

#### 인덱스별로 저장된 값들의 합을 구하기

In [8]:
x = pd.Series([3, 8, 5, 9], index=['서울', '대구', '부산', '광주'])
y = pd.Series([2, 4, 5, 1], index=['대구', '부산', '서울', '대전'])
x+y

광주     NaN
대구    10.0
대전     NaN
부산     9.0
서울     8.0
dtype: float64

#### unique()

In [9]:
#p175
medal = [1, 3, 2, 4, 2, 3]
x = pd.Series(medal)
print(pd.unique(x))

[1 3 2 4]


In [10]:
medal = ['민준', '현우', '서연', '동현', '서연', '현우']
x = pd.Series(medal)
print(pd.unique(x))

['민준' '현우' '서연' '동현']


#### 사전 형식의 데이터를 Pandas의 Series 형식으로 변환

In [11]:
age = {'민준':23, '현우':43, '서연':12, '동현':45}
x = pd.Series(age)
x

민준    23
현우    43
서연    12
동현    45
dtype: int64

In [12]:
names = ['민준', '서연', '현우', '민서', '동현', '수빈']
pdata = pd.Series(names)
print(pdata)

0    민준
1    서연
2    현우
3    민서
4    동현
5    수빈
dtype: object


In [13]:
a = pdata[3:6]
print(a.values)

['민서' '동현' '수빈']


#### 요약

In [1]:
#p176
print(a[2])

NameError: name 'a' is not defined

In [15]:
print(a)

3    민서
4    동현
5    수빈
dtype: object


### 데이터프레임

In [16]:
#p177
data = {'age' : [23, 43, 12, 45],
       'name' : ['민준', '현우', '서연', '동현'],
       'height' : [175.3, 180.3, 165.8, 172.7]}
x = pd.DataFrame(data, columns = ['name', 'age', 'height'])
x

Unnamed: 0,name,age,height
0,민준,23,175.3
1,현우,43,180.3
2,서연,12,165.8
3,동현,45,172.7


#### name 컬럼의 내용만 출력

In [17]:
x.name

0    민준
1    현우
2    서연
3    동현
Name: name, dtype: object

#### iloc()

In [18]:
ary = [[1, 2], [3, 4], [5, 6]]
data = pd.DataFrame(ary, columns=['First', 'Second'])
data

Unnamed: 0,First,Second
0,1,2
1,3,4
2,5,6


In [19]:
data.iloc[1]

First     3
Second    4
Name: 1, dtype: int64

In [20]:
#p178
data.iloc[:, -1]

0    2
1    4
2    6
Name: Second, dtype: int64

#### head()와 tail()

In [21]:
ary = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
data = pd.DataFrame(ary, columns=['First', 'Second'])
data.head(3)

Unnamed: 0,First,Second
0,1,2
1,3,4
2,5,6


In [22]:
data.tail(3)

Unnamed: 0,First,Second
2,5,6
3,7,8
4,9,10


#### 특정 항목만 선택

In [23]:
bools = [False, True, True, False, True]
data.Second[bools]

1     4
2     6
4    10
Name: Second, dtype: int64

In [24]:
#p179
print(x.mean(axis=0))

age        30.750
height    173.525
dtype: float64


#### [참고] 부울형 리스트를 인덱스 값으로 입력

In [25]:
data = {'age' : [23, 43, 12, 45],
       'name' : ['민준', '현우', '서연', '동현'],
       'height' : [175.3, 180.3, 165.8, 172.7]}
x = pd.DataFrame(data, columns = ['name', 'age', 'height'])
index = [True, False, True, False]
print(x[index])

  name  age  height
0   민준   23   175.3
2   서연   12   165.8


## Pandas 데이터의 형변환

In [26]:
#p180
import pandas as pd
#import numpy as np
ary = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
data = pd.DataFrame(ary, columns=['First', 'Second'])
data

Unnamed: 0,First,Second
0,1,2
1,3,4
2,5,6
3,7,8
4,9,10


In [27]:
data['First'] = data['First'].astype('float')
data

Unnamed: 0,First,Second
0,1.0,2
1,3.0,4
2,5.0,6
3,7.0,8
4,9.0,10


### Pandas에 파일 가져오기

In [28]:
#p181
import pandas as pd
food = pd.read_csv('food.csv')
food.head()

Unnamed: 0,Series_reference,Period,Data_value,STATUS,UNITS,Subject,Group,Series_title_1
0,CPIM.SE9S01,1999.06,645,REVISED,Index,Consumers Price Index - CPI,"Food Price Index for New Zealand, Seasonally a...",Seasonally adjusted
1,CPIM.SE9S01,1999.07,647,REVISED,Index,Consumers Price Index - CPI,"Food Price Index for New Zealand, Seasonally a...",Seasonally adjusted
2,CPIM.SE9S01,1999.08,645,REVISED,Index,Consumers Price Index - CPI,"Food Price Index for New Zealand, Seasonally a...",Seasonally adjusted
3,CPIM.SE9S01,1999.09,644,REVISED,Index,Consumers Price Index - CPI,"Food Price Index for New Zealand, Seasonally a...",Seasonally adjusted
4,CPIM.SE9S01,1999.1,641,REVISED,Index,Consumers Price Index - CPI,"Food Price Index for New Zealand, Seasonally a...",Seasonally adjusted


In [29]:
import pandas as pd
accident = pd.read_csv('acci.csv')
accident

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb9 in position 0: invalid start byte

In [30]:
import pandas as pd
accident = pd.read_csv('acci.csv', engine='python')
accident.head()

Unnamed: 0,법규위반,주야,발생건수,사망자수,부상자수,중상,경상,부상신고
0,과속,주,159,34,334,140,178,16
1,과속,야,218,73,348,200,139,9
2,교차로 통행방법 위반,주,8817,82,14031,3915,9530,586
3,교차로 통행방법 위반,야,5904,29,9728,2401,6884,443
4,기타,주,9388,141,14070,4271,9217,582
