In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

np.random.seed(0)

s = Series(np.random.randint(0, 10000, 10))
s

0    2732
1    9845
2    3264
3    4859
4    9225
5    7891
6    4373
7    5874
8    6744
9    3468
dtype: int64

# Beyond 1

What if the range were from 0 - 10,000? How would that change your strategy, if at all?

In [2]:
# Our string strategy will work just fine here! If none of the numbers
# are <10, then we can even remove the call to "fillna", but I think that 
# it's wiser to keep that around.

s.astype(str).str.get(-2).fillna('0').astype(np.int8)


0    3
1    4
2    6
3    5
4    2
5    9
6    7
7    7
8    4
9    6
dtype: int8

# Beyond 2

Given a range from 0 to 10,000, what's the smallest `dtype` we could use for our integers?

In [3]:
# Let's find the min and max values for our series:

print(s.min(), s.max())

2732 9845


In [4]:
# What happens if we use int8?
s.astype(np.int8)

0    -84
1    117
2    -64
3     -5
4      9
5    -45
6     21
7    -14
8     88
9   -116
dtype: int8

In [5]:
# What happens if we use uint8?
s.astype(np.uint8)

0    172
1    117
2    192
3    251
4      9
5    211
6     21
7    242
8     88
9    140
dtype: uint8

In [6]:
# So it seems we really need to use either np.int16 or np.uint16 to avoid problems!
s.astype(np.int16)

0    2732
1    9845
2    3264
3    4859
4    9225
5    7891
6    4373
7    5874
8    6744
9    3468
dtype: int16

# Beyond 3

Create a new series, with 10 floating-point values between 0 and 1,000. Find the numbers whose integer component (i.e., ignoring any fractional part) are even.

In [7]:
# First, create the series
s = Series(np.random.rand(10) * 1000)
s

0    383.441519
1    791.725038
2    528.894920
3    568.044561
4    925.596638
5     71.036058
6     87.129300
7     20.218397
8    832.619846
9    778.156751
dtype: float64

In [8]:
# Get the modulus (dividing by 2) of the int version of the numbers
# Check which results are 0, and use that as a mask index on s

s[s.astype(np.int64) % 2 == 0]

2    528.894920
3    568.044561
7     20.218397
8    832.619846
9    778.156751
dtype: float64