In [1]:
import pandas as pd

#### Series 기본
- 인덱스와 값의 쌍으로 구성 
- ndarray에 인덱스 붙인 것 
- ndarray의 함수들 특징들 다 사용 가능

dict 형태로 생성

In [2]:
S = pd.Series({"a": 1, "b": 2, "c":3, "d":4})
S

a    1
b    2
c    3
d    4
dtype: int64

array, index를 넣고 생성

In [6]:
S = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd'])
S

a    1
b    2
c    3
d    4
dtype: int64

index를 반드시 넣을 필요는 없다. 

In [1]:
S = pd.Series([1, 2, 3, 4])
S

0    1
1    2
2    3
3    4
dtype: int64

In [2]:
S.values

array([1, 2, 3, 4])

In [3]:
type(S.values)

numpy.ndarray

In [7]:
S.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
S ** 2 # 유니버설 함수와 브로드캐스팅이 적용됨

0     1
1     4
2     9
3    16
dtype: int64

#### DataFrame 기본
- 2차원 배열 자료형. Series는 1차원. 
- Col(Col), Index(Row)
- 하나 이상의 series의 집합

dict를 이용한 dataframe 생성

In [10]:
df = pd.DataFrame({"col1": [1, 2, 3, 4],
                  "col2": [5, 6, 7, 8]}, 
                  index = ['a', 'b', 'c', 'd'])

In [11]:
df.values

array([[1, 5],
       [2, 6],
       [3, 7],
       [4, 8]])

In [12]:
type(df.values)

numpy.ndarray

In [11]:
df.index # 행 인덱스 출력

Index(['a', 'b', 'c', 'd'], dtype='object')

In [12]:
df.columns # 컬럼들을 출력

Index(['col1', 'col2'], dtype='object')

In [13]:
type(df['col1']) # series

pandas.core.series.Series

#### 인덱싱과 슬라이싱

- Pandas에는 내가 정의한 명시적 인덱스가 있고, 원래 정의되있던 암묵적 인덱스도 있음. 
- 그래서, loc와 iloc 두 종류가 있는 것. 
    - S.loc['a'] = 1 # 명시적 인덱스를 찾는다. 
    - S.iloc[2] = 3 # 암묵적 인덱스를 찾는다.
    - S.loc["a": "c"] = [1, 2, 3] # loc를 이용한 슬라이싱에서는 맨 뒤 값을 포함. 
    - S.iloc[1:3] = [2, 3] # iloc를 이용한 슬라이싱에서는 맨 뒤 값을 포함하지 않음.     

In [18]:
S = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd'])
S, type(S)

(a    1
 b    2
 c    3
 d    4
 dtype: int64,
 pandas.core.series.Series)

In [19]:
S.loc['a'] # 사전에서 키를 가지고 값을 찾는 것과 완벽히 동일

1

In [20]:
S.iloc[2]

3

loc는 a부터 c까지 다 포함

In [21]:
S.loc['a':'c']

a    1
b    2
c    3
dtype: int64

iloc에서는 마지막은 뺀다. 

In [24]:
S.iloc[1:3]

b    2
c    3
dtype: int64

In [25]:
df

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [26]:
df[['col1', 'col2']]

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [27]:
df['col1'] # column name => Series

a    1
b    2
c    3
d    4
Name: col1, dtype: int64

In [28]:
df[['col1']] # column name list => Data Frame

Unnamed: 0,col1
a,1
b,2
c,3
d,4


In [29]:
df.loc['a', 'col2']

5

In [32]:
df.loc['a':'c', 'col1']

a    1
b    2
c    3
Name: col1, dtype: int64

In [31]:
df.iloc[1:3, 1]

b    6
c    7
Name: col2, dtype: int64

#### 값 조회하기

In [33]:
# 모든 행과 모든 열을 보여주기
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [37]:
# 최대 10개 행과 최대 10 열을 보여주기
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)

In [36]:
import numpy as np
df = pd.DataFrame(np.random.random(size = (500, 5)),
                 columns = ['X1', 'X2', 'X3', 'X4', 'X5'])
df

Unnamed: 0,X1,X2,X3,X4,X5
0,0.825130,0.153057,0.568997,0.124962,0.709438
1,0.374703,0.793732,0.797620,0.319817,0.359058
2,0.218627,0.369342,0.573366,0.707100,0.185524
3,0.750365,0.384759,0.188078,0.414661,0.059974
4,0.733913,0.458454,0.188973,0.200790,0.469428
...,...,...,...,...,...
495,0.451797,0.032363,0.989939,0.656679,0.456013
496,0.052197,0.947598,0.095592,0.920848,0.349987
497,0.982538,0.266727,0.261368,0.157445,0.435672
498,0.281466,0.431857,0.035410,0.661351,0.107385


In [38]:
df

Unnamed: 0,X1,X2,X3,X4,X5
0,0.825130,0.153057,0.568997,0.124962,0.709438
1,0.374703,0.793732,0.797620,0.319817,0.359058
2,0.218627,0.369342,0.573366,0.707100,0.185524
3,0.750365,0.384759,0.188078,0.414661,0.059974
4,0.733913,0.458454,0.188973,0.200790,0.469428
...,...,...,...,...,...
495,0.451797,0.032363,0.989939,0.656679,0.456013
496,0.052197,0.947598,0.095592,0.920848,0.349987
497,0.982538,0.266727,0.261368,0.157445,0.435672
498,0.281466,0.431857,0.035410,0.661351,0.107385


In [39]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,0.82513,0.153057,0.568997,0.124962,0.709438
1,0.374703,0.793732,0.79762,0.319817,0.359058
2,0.218627,0.369342,0.573366,0.7071,0.185524
3,0.750365,0.384759,0.188078,0.414661,0.059974
4,0.733913,0.458454,0.188973,0.20079,0.469428


In [40]:
df.head(10)

Unnamed: 0,X1,X2,X3,X4,X5
0,0.82513,0.153057,0.568997,0.124962,0.709438
1,0.374703,0.793732,0.79762,0.319817,0.359058
2,0.218627,0.369342,0.573366,0.7071,0.185524
3,0.750365,0.384759,0.188078,0.414661,0.059974
4,0.733913,0.458454,0.188973,0.20079,0.469428
5,0.463185,0.587585,0.616757,0.239256,0.258516
6,0.341866,0.942133,0.023651,0.788763,0.900153
7,0.73135,0.933751,0.678195,0.39687,0.022733
8,0.269565,0.31138,0.982371,0.093004,0.940345
9,0.897754,0.783831,0.220259,0.069539,0.739502


In [41]:
df.tail()

Unnamed: 0,X1,X2,X3,X4,X5
495,0.451797,0.032363,0.989939,0.656679,0.456013
496,0.052197,0.947598,0.095592,0.920848,0.349987
497,0.982538,0.266727,0.261368,0.157445,0.435672
498,0.281466,0.431857,0.03541,0.661351,0.107385
499,0.418343,0.305514,0.831688,0.986047,0.67992


In [42]:
df.columns

Index(['X1', 'X2', 'X3', 'X4', 'X5'], dtype='object')

In [43]:
df.dtypes

X1    float64
X2    float64
X3    float64
X4    float64
X5    float64
dtype: object

In [44]:
df.iloc[3, 4] = 'Changed'
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,0.82513,0.153057,0.568997,0.124962,0.709438
1,0.374703,0.793732,0.79762,0.319817,0.359058
2,0.218627,0.369342,0.573366,0.7071,0.185524
3,0.750365,0.384759,0.188078,0.414661,Changed
4,0.733913,0.458454,0.188973,0.20079,0.469428


In [46]:
df['X5']

0      0.709438
1      0.359058
2      0.185524
3       Changed
4      0.469428
         ...   
495    0.456013
496    0.349987
497    0.435672
498    0.107385
499     0.67992
Name: X5, Length: 500, dtype: object

In [26]:
df.iloc[3:20, 2:4] = 'Changed2'
df.iloc[:25]

Unnamed: 0,X1,X2,X3,X4,X5
0,0.360255,0.860208,0.310894,0.210855,0.366435
1,0.265531,0.592843,0.459606,0.610412,0.854391
2,0.862768,0.064806,0.94739,0.359989,0.103019
3,0.996828,0.405544,Changed2,Changed2,Changed
4,0.623951,0.643929,Changed2,Changed2,0.268549
5,0.283158,0.456452,Changed2,Changed2,0.502361
6,0.482873,0.218449,Changed2,Changed2,0.28079
7,0.252569,0.916347,Changed2,Changed2,0.634281
8,0.489403,0.75429,Changed2,Changed2,0.0578677
9,0.100635,0.258108,Changed2,Changed2,0.310467
