In [1]:
import pandas as pd

In [2]:
animals = ['tiger', 'bear', 'moose']
pd.Series(animals)


0    tiger
1     bear
2    moose
dtype: object

In [3]:
nums = [1, 2, 3]
pd.Series(nums)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['tiger', 'bear', None]
pd.Series(animals)

0    tiger
1     bear
2     None
dtype: object

In [23]:
nums = [1, 2, None]
x = pd.Series(nums)
x


0    1.0
1    2.0
2    NaN
dtype: float64

In [25]:
x.name
type(x.name)

NoneType

In [28]:
new_nums = [1, 2, 3, 4]
y = pd.Series(new_nums, name='my_numbers')
y

0    1
1    2
2    3
3    4
Name: my_numbers, dtype: int64

In [29]:
y.name

'my_numbers'

In [31]:
# Assign custom indexes to Series
animals = pd.Series(['tiger', 'bear', 'moose'], index=['india', 'usa', 'canada'])
animals

india     tiger
usa        bear
canada    moose
dtype: object

In [36]:
# Creation of a Series using Dictionary
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea',
         }
pd.Series(sports)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [38]:
#If a "key" is not present "NaN" is stored
#If a "value" is not present "None" is stored
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          None: 'Japan',
          'Taekwondo': None,
         }
pd.Series(sports)

Archery        Bhutan
Golf         Scotland
NaN             Japan
Taekwondo        None
dtype: object

In [41]:
"""
If "keys" and "values" both are numbers and any value from keys/values is absent then both stored as NaN
and dtype is set as "float64"
"""

num_dict = {1:1,
            2:2,
            None:3,
            4:None}
pd.Series(num_dict)

1      1.0
2      2.0
NaN    3.0
4      NaN
dtype: float64

# Querying a Series

In [51]:
sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [61]:
# Wrong way: () 
# Right way: []
# Because "iloc" and "loc" are 'attributes' and not the 'methods'

# Therefore, s.iloc(2) or s.loc('Golf') will give error
s.iloc[2]

'Japan'

In [62]:
s.loc['Golf']

'Scotland'

In [63]:
s[2]

'Japan'

In [64]:
s['Golf']

'Scotland'

In [66]:
test = pd.Series(['one', 'two', 'three'], index=[(1, 2, 3), 'b', 3])
test

(1, 2, 3)      one
b              two
3            three
dtype: object

In [77]:
test['b']
test[3]
test[(1, 2, 3)]

test.loc['b']
test.loc[3]
test[(1, 2, 3)]

'one'

In [85]:
"""
If we set the numerical indexes and then try to get the series using the positional indexes then
we will get an error because Pandas will get confused whether to use positional-indexes or label-indexes 
"""

num_index_sports = {1:'China', 2:'India', 3:'US', 4:'Indonesia', 5:'Brazil'}
num_index_series = pd.Series(num_index_sports)
num_index_series

1        China
2        India
3           US
4    Indonesia
5       Brazil
dtype: object

In [88]:
"""
    Here if we try to get the first element using the positional-index 0,
    then we will get an error because it is now confused between
    numeric-label-indexes & default-numeric-positonal-indexes
"""
# num_index_series[0]
num_index_series.iloc[0]

'China'

In [125]:
import numpy as np
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums.head()

0    805
1    576
2    787
3    493
4    121
dtype: int64

In [126]:
%%timeit -n 100
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
total = 0
for item in rand_nums:
    total += item

1.2 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [127]:
%%timeit -n 100
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
total = sum(rand_nums)

727 µs ± 71.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [129]:
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums.head()

0    673
1    102
2    681
3    254
4    258
dtype: int64

In [144]:
%%timeit -n 10
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in rand_nums.iteritems():
    rand_nums.at[label] = value+2

157 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [145]:
%%timeit -n 10
rand_nums = pd.Series(np.random.randint(0, 1000, 10000))
rand_nums += 2

634 µs ± 223 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Add new data 

In [146]:
s = pd.Series([1 ,2, 3])
s

0    1
1    2
2    3
dtype: int64

In [149]:
s.loc['name'] = 'Mohit'
s

0           1
1           2
2           3
name    Mohit
dtype: object

In [159]:
original_sports = pd.Series({'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'})
cricket_loving = pd.Series(['Australia', 'Barbados', 'Pakistan', 'England'], index=['Cricket', 'Cricket', 'Cricket', 'Cricket',])

In [160]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [161]:
cricket_loving

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [162]:
all_countries = original_sports.append(cricket_loving)
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [163]:
# Change the 'Cricket' to 5, 6, 7, 8 then also it will run fine

# One important thing to note is that the both the ORIGINAL Series are not MUTATED

original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [164]:
cricket_loving

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [165]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [167]:
original_sports.append(cricket_loving)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [168]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [175]:
type(all_countries.loc['Golf'])

str

In [176]:
type(all_countries['Cricket'])

pandas.core.series.Series