In [1]:
import numpy as np
import pandas as pd

+ Series
 - pandas 기본 객체로 DataFrame과 함께 빈번하게 사용
 - ndarray 기반 인덱싱 추가(1차원 배열)

In [2]:
series = pd.Series(20)
series

0    20
dtype: int64


In [3]:
series[0]

20


In [4]:
series2 = pd.Series(range(1, 11))
series2

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int32


+ index, value select

In [5]:
print(series2.values)
print(series2.index)

[ 1  2  3  4  5  6  7  8  9 10]
RangeIndex(start=0, stop=10, step=1)


+ index 지정

In [6]:
series3 = pd.Series(range(1, 5), index=['a', 'b', 'c', 'd'])
print(series3)
print('-'*50)
print(series3.values)
print('-'*50)
print(series3.index)

a    1
b    2
c    3
d    4
dtype: int32
--------------------------------------------------
[1 2 3 4]
--------------------------------------------------
Index(['a', 'b', 'c', 'd'], dtype='object')


+ value select
 - loc: index
 - iloc: 0-based index

In [7]:
series3

a    1
b    2
c    3
d    4
dtype: int32

In [8]:
print(series3[1])
print(series3['b'])

2
2


In [9]:
series3[['a', 'b']]

a    1
b    2
dtype: int32

In [10]:
print(series3.loc['a'])
print(series3.iloc[0])

1
1


In [11]:
print(series3.loc[['a', 'b']])
print(series3.iloc[[0, 1]])

a    1
b    2
dtype: int32
a    1
b    2
dtype: int32


+ series create and index reuse

In [12]:
series4 = pd.Series(1, index=series3.index)
series4

a    1
b    1
c    1
d    1
dtype: int64

In [13]:
series5 = pd.Series(np.random.randn(5))
series5

0   -0.744492
1    2.135461
2   -0.447777
3    0.043428
4   -0.240325
dtype: float64

In [14]:
series6 = pd.Series({'math':100, 'sci':80})
series6

math    100
sci      80
dtype: int64

+ size, shape, unique, count

In [15]:
s = pd.Series([2, 1, 2, 3, np.nan])
s

0    2.0
1    1.0
2    2.0
3    3.0
4    NaN
dtype: float64

In [16]:
print(len(s))
print(s.size)

5
5


In [17]:
# return tuple
s.shape

(5,)

In [18]:
# counting
s.count()

4

In [19]:
# except duplicates
s.unique()

array([  2.,   1.,   3.,  nan])

In [20]:
# count values except NaN
s.value_counts()

2.0    2
3.0    1
1.0    1
dtype: int64

+ head, tail, take
 - 자료 부분 출력

In [21]:
# top 5 elements
s.head()

0    2.0
1    1.0
2    2.0
3    3.0
4    NaN
dtype: float64

In [22]:
# low 5 elements
s.tail()

0    2.0
1    1.0
2    2.0
3    3.0
4    NaN
dtype: float64

In [23]:
s.head(n=2)

0    2.0
1    1.0
dtype: float64

In [24]:
# take by 0-based index
s.take([0, 1])

0    2.0
1    1.0
dtype: float64

In [25]:
s = pd.Series(range(1, 3), index=['a', 'b'])
s.take(['a', 'b'])

TypeError: unorderable types: numpy.ndarray() < int()

+ calculating by index

In [1]:
s1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 3, 2, 1], index=['d', 'c', 'b', 'a'])

print(s1)
print(s2)

a    1
b    2
c    3
d    4
dtype: int64
d    4
c    3
b    2
a    1
dtype: int64


In [2]:
s1 + s2

a    2
b    4
c    6
d    8
dtype: int64

In [3]:
# element-wise in numpy
a1 = np.array([1, 2, 3, 4])
a2 = np.array([4, 3, 2, 1])

a1 + a2

array([5, 5, 5, 5])

In [4]:
s1 * s2

a     1
b     4
c     9
d    16
dtype: int64

In [5]:
s1 ** 3

a     1
b     8
c    27
d    64
dtype: int64

In [6]:
s3 = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'f'])
s4 = pd.Series([4, 3, 2, 1, 0], index=['d', 'c', 'b', 'a', 'g'])

s3 + s4

a    2.0
b    4.0
c    6.0
d    8.0
f    NaN
g    NaN
dtype: float64

+ handling NA in pandas

In [7]:
np_array = np.array([1, 2, 3, np.NaN])
pd_series = pd.Series([1, 2, 3, np.NaN])

In [8]:
np_array.mean()

nan

In [9]:
pd_series.mean()

2.0

In [10]:
pd_series.mean(skipna=False)

nan