# Series is a univariate data set
- A mapping from an index to a value
- Like an ordered dict with a uniform data type
- Or, like a data and label array

In [2]:
import numpy as np
import pandas as pd

In [3]:
s = pd.Series([6, 8, 7, 5])
s

0    6
1    8
2    7
3    5
dtype: int64

In [4]:
# Relabel index as labels
s2 = pd.Series([6, 8, 7, 5], index=['A', 'B', 'C', 'D'])
s2

A    6
B    8
C    7
D    5
dtype: int64

# Data conversion

In [4]:
s.astype(np.float64)

0    6.0
1    8.0
2    7.0
3    5.0
dtype: float64

In [6]:
s.values # numpy compatible

array([6, 8, 7, 5])

# Explicit index

In [7]:
s1 = pd.Series([6, 8, 7, 5], index=['b', 'c', 'd', 'a'])
s1

b    6
c    8
d    7
a    5
dtype: int64

In [8]:
s1.index

Index(['b', 'c', 'd', 'a'], dtype='object')

In [11]:
s1.array # preferred over s1.values

<PandasArray>
[6, 8, 7, 5]
Length: 4, dtype: int64

# Create from Dict

In [15]:
s2 = pd.Series({'b': 6, 'd': 8, 'c': 7, 'a': 5})
s2

b    6
d    8
c    7
a    5
dtype: int64

# Accessors
* Use iloc, loc to reduce ambiguity

In [14]:
s2.iloc[0] # by index, s2[0]

6

In [16]:
s2.loc['a'] # by label, s2['a']

5

In [17]:
s2[['a', 'c']] # multi-select, vectorization

a    5
c    7
dtype: int64

In [18]:
s2.loc[['a', 'c']] # equivalent to above

a    5
c    7
dtype: int64

In [21]:
s2[0:2] # by slice

b    6
d    8
dtype: int64

# Filtering

In [24]:
idx = s2 > 5 # vectorized, boolean series
idx

b     True
d     True
c     True
a    False
dtype: bool

In [25]:
s2[s2 > 5] # s2[idx]

b    6
d    8
c    7
dtype: int64

# Compound filtering

In [28]:
idx2 = (s2 > 6) & (s2 < 100)
idx2

b    False
d     True
c     True
a    False
dtype: bool

In [29]:
s2[(s2 > 6) & (s2 < 100)]

d    8
c    7
dtype: int64

# Missing data

In [49]:
s3 = pd.Series([6., 8., np.nan, 7.], dtype='Int64')
s3

0      6
1      8
2    NaN
3      7
dtype: Int64

In [50]:
s3[s3.notnull()]

0    6
1    8
3    7
dtype: Int64

In [34]:
s3[pd.notnull(s3)]

0    6.0
1    8.0
3    7.0
dtype: float64

# np.nan coerces to float
* unless dtype parameter is provided

In [40]:
s4 = pd.Series([1, 2, 3, np.nan, 5], index=list('abcde'))
s4

a    1.0
b    2.0
c    3.0
d    NaN
e    5.0
dtype: float64

# Alignment

In [45]:
s5 = pd.Series([6, 8, 7, 5], index=list('abcd'))
s6 = pd.Series([1, 2, 3, np.nan, 5], index=list('abcde'))

In [47]:
s5 + s6 # aligned by labels by default, like outer join

a     7.0
b    10.0
c    10.0
d     NaN
e     NaN
dtype: float64