# The Series Data Structure

In [1]:
import pandas as pd
pd.Series?

[0;31mInit signature:[0m
[0mpd[0m[0;34m.[0m[0mSeries[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdata[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mindex[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfastpath[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
One-dimensional ndarray with axis labels (including time series).

Labels need not be unique but must be a hashable type. The object
supports both integer- and label-based indexing and provides a host of
methods for performing operations involving the index. Statistical
methods from ndarray have been overridden to automatically exclude
missing data (currentl

In [2]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [5]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
import numpy as np 
np.nan == None

False

In [7]:
np.nan == np.nan

False

In [9]:
np.isnan(np.nan)

True

In [8]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [10]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [11]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index = ['India', 'America', 'Canada'])
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [12]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index  = ['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

In [13]:
sports = {'Archery' : 'Bhutan',
          'Golf' : 'Scotland',
          'Sumo' : 'Japan',
          'Taekwondo' : 'Korea'}

s = pd.Series(sports)
s

Archery        Bhutan
Golf         Scotland
Sumo            Japan
Taekwondo       Korea
dtype: object

In [14]:
s.iloc[3] # query by numeric location, starting at 0

'Korea'

In [15]:
s.loc['Golf'] # query by the index label

'Scotland'

In [16]:
s[3]

'Korea'

In [17]:
s['Golf']

'Scotland'

In [19]:
sports = { 99 : 'Bhutan',
           100 : 'Scotland',
           101 : 'Japan',
           102 : 'Korea'
           }

s = pd.Series(sports)

In [20]:
s[0] # this doesn't call s.iloc[0] as expected, and generates a key error

KeyError: 0

## Iterating over a series

In [21]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [22]:
total = 0
for item in s:
    total += item
print(total)

324.0


In [24]:
import numpy as np

total = np.sum(s)
print(total)

324.0


In [28]:
# test speed between iterating and functional programming

s = pd.Series(np.random.randint(0, 1000, 10000))
s.head()

0    231
1    734
2    910
3    326
4    836
dtype: int64

In [29]:
len(s)

10000

In [30]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item

1.58 ms ± 46.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [33]:
%%timeit -n 100
summary = np.sum(s)

105 µs ± 9.77 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [34]:
s += 2 # adds two to each item in s using broadcasting
s

0       233
1       736
2       912
3       328
4       838
       ... 
9995    930
9996     74
9997    550
9998    413
9999    239
Length: 10000, dtype: int64

In [39]:
for label, value in s.iteritems():
    s.set_value(label, value+2) # set_value has been deprecated from pandas

s.head()

AttributeError: 'Series' object has no attribute 'set_value'

In [41]:
%%timeit -n 10
for label, value in s.iteritems():
    s.loc[label] = value + 2

592 ms ± 4.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [42]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
s+=2

416 µs ± 62.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [43]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [44]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [45]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [46]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [47]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [48]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object