# Chapter 2: The Series object

## 2.1 Overview of a Series

In [1]:
import pandas as pd
import numpy as np

### 2.1.1 Classes and instances

In [2]:
pd.Series()

Series([], dtype: object)

### 2.1.2 Populating the Series with values

In [3]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [4]:
# THe two lines below are equivalent
pd.Series(ice_cream_flavors)
pd.Series(data=ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

### 2.1.3 Customizing the Series index

In [6]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Saturday")

# The two lines below are equivalent
pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data=ice_cream_flavors, index=days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [11]:
# THe index permits duplicates, a detail that distinguishes a Series from a Python dictionary.
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Wednesday")

# The two lines below are equivalent
pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data=ice_cream_flavors, index=days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [12]:
# Keyword arguments allow passing parameters in any order.
pd.Series(index=days_of_week, data=ice_cream_flavors)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [13]:
bunch_of_bools = [True, False, False]
pd.Series(bunch_of_bools)

0     True
1    False
2    False
dtype: bool

In [14]:
stock_prices = [985.32, 950.44]
time_of_day = ["Open", "Close"]
pd.Series(data=stock_prices, index=time_of_day)

Open     985.32
Close    950.44
dtype: float64

In [15]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

In [16]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers, dtype="float")

0     4.0
1     8.0
2    15.0
3    16.0
4    23.0
5    42.0
dtype: float64

### 2.1.4 Creating a Series with missing values

In [17]:
temperatures = [94, 88, np.nan, 91]
pd.Series(data=temperatures)

0    94.0
1    88.0
2     NaN
3    91.0
dtype: float64

## 2.2 Creating a Series from Python objects

In [18]:
calorie_info = {
    "Cereal": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342,
}

diet = pd.Series(calorie_info)
diet

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
dtype: int64

In [19]:
pd.Series(data=("Red", "Green", "Blue"))

0      Red
1    Green
2     Blue
dtype: object

In [20]:
rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(data=rgb_colors)

0     (120, 41, 26)
1    (196, 165, 45)
dtype: object

In [22]:
my_set = {"Ricky", "Bobby"}
# pd.Series(my_set) # TypeError

In [23]:
pd.Series(list(my_set))

0    Bobby
1    Ricky
dtype: object

In [30]:
random_data = np.random.randint(1, 101, 10)
random_data

array([32, 72, 30, 58, 10, 78, 47, 94, 86, 23])

In [31]:
pd.Series(random_data)

0    32
1    72
2    30
3    58
4    10
5    78
6    47
7    94
8    86
9    23
dtype: int32

## 2.3 Series attributes

In [32]:
diet.values

array([125, 406, 342], dtype=int64)

In [33]:
type(diet.values)

numpy.ndarray

In [34]:
diet.index

Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')

In [36]:
type(diet.index)

pandas.core.indexes.base.Index

In [38]:
diet.dtype

dtype('int64')

In [39]:
diet.size

3

In [40]:
diet.shape

(3,)

In [41]:
diet.is_unique

True

In [43]:
pd.Series([3, 3]).is_unique

False

In [49]:
pd.Series(data=[1, 3, 6]).is_monotonic_increasing

True

In [50]:
pd.Series(data=[1, 6, 3]).is_monotonic_increasing

False

In [52]:
pd.Series(data=[6, 3, 1]).is_monotonic_decreasing

True

In [53]:
pd.Series(data=[1, 6, 3]).is_monotonic_decreasing

False

## 2.4 Retrieving the first and last rows

In [54]:
values = range(0, 500, 5)
nums = pd.Series(data=values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [55]:
nums.head(3)

0     0
1     5
2    10
dtype: int64

In [56]:
nums.head(n=3)

0     0
1     5
2    10
dtype: int64

In [57]:
nums.head()

0     0
1     5
2    10
3    15
4    20
dtype: int64

In [58]:
nums.tail(6)

94    470
95    475
96    480
97    485
98    490
99    495
dtype: int64

In [59]:
nums.tail()

95    475
96    480
97    485
98    490
99    495
dtype: int64

## 2.5 Mathematical operations

### 2.5.1 Statistical operations

In [85]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [86]:
numbers.count()

5

In [87]:
numbers.sum()

15.0

In [88]:
numbers.sum(skipna=False)

nan

In [89]:
numbers.sum(min_count=3)

15.0

In [90]:
numbers.sum(min_count=6)

nan

In [91]:
numbers.product()

120.0

In [92]:
numbers.product(skipna=False)

nan

In [93]:
numbers.product(min_count=3)

120.0

In [94]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [95]:
numbers.cumsum()

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

In [96]:
numbers.cumsum(skipna=False)

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [97]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [98]:
numbers.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [99]:
# The three lines below are equivalent
numbers.pct_change()
numbers.pct_change(fill_method="pad")
numbers.pct_change(fill_method="ffill")

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

![image.png](attachment:fa2fd7ca-7749-475a-a73a-23ebf1bb35ee.png)

In [100]:
# The two lines below are equivalent
numbers.pct_change(fill_method="bfill")
numbers.pct_change(fill_method="backfill")

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

![image.png](attachment:56ef5034-45da-4ff3-a217-7757dedcdc49.png)

In [101]:
numbers.mean()

3.0

In [102]:
numbers.median()

3.0

In [103]:
numbers.std()

1.5811388300841898

In [104]:
numbers.max()

5.0

In [105]:
numbers.min()

1.0

In [106]:
animals = pd.Series(["koala", "aardvark", "zebra"])
animals

0       koala
1    aardvark
2       zebra
dtype: object

In [107]:
animals.max()

'zebra'

In [108]:
animals.min()

'aardvark'

In [109]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [125]:
numbers.sample(3)

2    3.0
3    NaN
1    2.0
dtype: float64

In [131]:
authors = pd.Series(
    ["Hemingway", "Orwell", "Dostoevsky", "Fitzgerald", "Orwell"]
)

authors.unique()

array(['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [134]:
authors.nunique()

4

### 2.5.2 Arithmetic operations

In [135]:
s1 = pd.Series(data=[5, np.nan, 15],  index=["A", "B", "C"])
s1

A     5.0
B     NaN
C    15.0
dtype: float64

In [136]:
s1 + 3

A     8.0
B     NaN
C    18.0
dtype: float64

In [137]:
s1.add(3)

A     8.0
B     NaN
C    18.0
dtype: float64

In [140]:
# The three lines below are equivalent
s1 - 5
s1.sub(5)
s1.subtract(5)

A     0.0
B     NaN
C    10.0
dtype: float64

In [143]:
# The three lines below are equivalent
s1 * 2
s1.mul(2)
s1.multiply(2)

A    10.0
B     NaN
C    30.0
dtype: float64

In [146]:
# The three lines below are equivalent
s1 / 2
s1.div(2)
s1.divide(2)

A    2.5
B    NaN
C    7.5
dtype: float64

In [148]:
# The two lines below are equivalent
s1 //  4
s1.floordiv(4)

A    1.0
B    NaN
C    3.0
dtype: float64

In [151]:
# The two lines below are equivalent
s1 % 3
s1.mod(3)

A    2.0
B    NaN
C    0.0
dtype: float64

### 2.5.3 Broadcasting

In [152]:
s1 = pd.Series([1, 2, 3], index=["A", "B", "C"])
s2 = pd.Series([4, 5, 6], index=["A", "B", "C"])

In [153]:
s1 + s2

A    5
B    7
C    9
dtype: int64

In [154]:
s1 = pd.Series(data=[3, 6, np.nan, 12])
s2 = pd.Series(data=[3, 6, np.nan, 12])

In [158]:
# The two lines below are equivalent
s1 == s2
s1.eq(s2)

0     True
1     True
2    False
3     True
dtype: bool

In [161]:
# The two lines below are equivalent
s1 != s2
s1.ne(s2)

0    False
1    False
2     True
3    False
dtype: bool

In [162]:
# when the indices differ
s1 = pd.Series(data=[5, 10, 15], index=["A", "B", "C"])
s2 = pd.Series(data=[4, 8, 12, 14], index=["B", "C", "D", "E"])

In [163]:
s1 + s2

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

![image.png](attachment:4bf8301a-23bc-476c-9d9a-5376ea7239d7.png)

## 2.6 Passing the Series to Python's built-in functions

In [164]:
cities = pd.Series(data=["San Francisco", "Los Angeles", "Las Vegas", np.nan])

In [165]:
len(cities)

4

In [167]:
type(cities)

pandas.core.series.Series

In [168]:
dir(cities)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex__',
 '__rep

In [169]:
list(cities)

['San Francisco', 'Los Angeles', 'Las Vegas', nan]

In [170]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las Vegas', 3: nan}

In [171]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [175]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [173]:
"Las Vegas" in cities

False

In [174]:
2 in cities

True

In [176]:
"Las Vegas" in cities.values

True

In [177]:
100 not in cities

True

In [178]:
"Paris" not in cities.values

True

## 2.7 Coding challenge

### 2.7.1 Problems

In [179]:
superheroes = [
    "Batman",
    "Superman",
    "Spider-Man",
    "Iron Man",
    "Captain America",
    "Wonder Woman"
]

strengh_levels = (100, 120, 90, 95, 110, 120)

### 2.7.2 Solutions

In [180]:
# 1. Use the list of superheroes to populate a new Series object.
pd.Series(superheroes)

0             Batman
1           Superman
2         Spider-Man
3           Iron Man
4    Captain America
5       Wonder Woman
dtype: object

In [181]:
# 2. Use the tuple of strengths to populate a new Series object.
pd.Series(data=strengh_levels)

0    100
1    120
2     90
3     95
4    110
5    120
dtype: int64

In [183]:
# 3. Create a Series with the superheroes as index labels and the strength levels as
# the values. Assign the Series to a heroes variable.
heroes = pd.Series(data=strengh_levels, index=superheroes)
heroes

Batman             100
Superman           120
Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [184]:
# 4. Extract the first two rows of the heroes Series.
heroes.head(2)

Batman      100
Superman    120
dtype: int64

In [185]:
# 5. Extract the last four rows of the heroes Series.
heroes.tail(4)

Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [186]:
# 6. Determine the number of unique values in your heroes Series.
heroes.nunique()

5

In [187]:
# 7. Calculate the average strength of the superheroes in heroes.
heroes.mean()

105.83333333333333

In [188]:
# 8. Calculate the maximum and mbinimum strengths in heroes.
heroes.max()

120

In [None]:
# 9. Calculate what each superhero’s strength level would be if it doubled.

In [None]:
# 10. Convert the heroes Series to a Python dictionary.