# Agenda

1. Series
2. Comparing between NumPy array and series
3. Indexing (`.loc` and `.iloc`)
4. Dtypes
5. `NaN`
6. Methods

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
s = Series([10, 20, 30, 40, 50, 60])
s

0    10
1    20
2    30
3    40
4    50
5    60
dtype: int64

In [3]:
s = Series([10, 20, 30, 40, 50, 60],
          dtype=np.int16)
s

0    10
1    20
2    30
3    40
4    50
5    60
dtype: int16

In [4]:
s[0]

np.int16(10)

In [5]:
s[1]

np.int16(20)

In [6]:
s.loc[0]

np.int16(10)

In [7]:
s.loc[1]

np.int16(20)

In [8]:
s

0    10
1    20
2    30
3    40
4    50
5    60
dtype: int16

In [10]:
s.loc[[2, 3, 4]]   # fancy indexing

2    30
3    40
4    50
dtype: int16

In [11]:
s.mean()

np.float64(35.0)

In [12]:
s.max()

np.int16(60)

In [13]:
s.std()

np.float64(18.708286933869708)

In [14]:
s

0    10
1    20
2    30
3    40
4    50
5    60
dtype: int16

In [15]:
s.loc[3] = 999
s

0     10
1     20
2     30
3    999
4     50
5     60
dtype: int16

In [16]:
s.loc[[1, 3, 5]] = 888
s

0     10
1    888
2     30
3    888
4     50
5    888
dtype: int16

In [17]:
s = Series([10, 20, 30, 40, 50, 60],
          index=list('abcdef'))

In [18]:
s

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [19]:
s.loc['a']

np.int64(10)

In [20]:
s.loc[['a', 'd', 'f']]

a    10
d    40
f    60
dtype: int64

In [21]:
s.loc['a':'d']

a    10
b    20
c    30
d    40
dtype: int64

In [22]:
# what if I want by position?

s.iloc[0]

np.int64(10)

In [23]:
s.iloc[4]

np.int64(50)

In [24]:
s.iloc[[2,3,4]]

c    30
d    40
e    50
dtype: int64

In [25]:
s

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [26]:
%timeit s.loc[['b', 'd', 'f']]

205 µs ± 4.18 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [27]:
%timeit s.iloc[[1, 3, 5]]

75.9 µs ± 4.65 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [28]:
s

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [29]:
s.loc[[True, False, True, True, False, True]]

a    10
c    30
d    40
f    60
dtype: int64

In [30]:
s > s.mean()

a    False
b    False
c    False
d     True
e     True
f     True
dtype: bool

In [31]:
s.loc[s > s.mean()]

d    40
e    50
f    60
dtype: int64

In [32]:
s.loc['b':'e']

b    20
c    30
d    40
e    50
dtype: int64

In [34]:
s.iloc[1:4]

b    20
c    30
d    40
dtype: int64

In [35]:
s.iloc[s > s.mean()]

ValueError: iLocation based boolean indexing cannot use an indexable as a mask

In [36]:
s

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [37]:
s.index[3]

'd'

In [38]:
s.min()

np.int64(10)

In [39]:
s.max()

np.int64(60)

In [40]:
s.idxmin()

'a'

In [41]:
s.idxmax()

'f'

# Exercises:

1. Create a series with 10 random integers, index a-j.
2. Retrieve from index b.
3. Retrieve from indexes c, d, and f
4. What is the mean of indexes a, e, g, and h?
5. What is the mean of items with even (positional) indexes?
6. What is the mean of the even numbers?

In [42]:
np.random.seed(0)
s = Series(np.random.randint(0, 100, 10),
          index=list('abcdefghij'))
s

a    44
b    47
c    64
d    67
e    67
f     9
g    83
h    21
i    36
j    87
dtype: int64

In [43]:
# Retrieve from index b.

s.loc['b']

np.int64(47)

In [44]:
# Retrieve from indexes c, d, and f

s.loc[['c', 'd', 'f']]

c    64
d    67
f     9
dtype: int64

In [46]:
# What is the mean of indexes a, e, g, and h?

s.loc[['a', 'e', 'g', 'h']].mean()

np.float64(53.75)

In [None]:
# What is the mean of items with even (positional) indexes?

s.iloc[[0,2,4,6,8]]

In [None]:
# What is the mean of the even numbers?