#  Series

In [None]:
import pandas as pd
import numpy as np
pd.Series?

In [3]:
# List
animals = ['Tiger', 'Bear', 'Moose']
# pandas automatically identified the data stored
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [4]:
numbers = [1, 2, 3, 4]
pd.Series(numbers)

0    1
1    2
2    3
3    4
dtype: int64

In [5]:
# How Pandas handle missing value
animals = ['Tigers', 'Bear', None]
pd.Series(animals)

0    Tigers
1      Bear
2      None
dtype: object

In [6]:
# Missing value for interger
numbers = [1, 2, 3, None]
pd.Series(numbers)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

In [9]:
# Note NaN is not None
np.nan == None

False

In [10]:
np.nan == np.nan

False

In [11]:
# Use Special function to test for NaN
np.isnan(np.nan)

True

In [12]:
# Other way to create Series
# Dictonary to Series
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         'Taekwondo': 'South Korea'}
s = pd.Series(sports)

In [14]:
# Index of Series change to key of Dictonary
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [15]:
# Get the index object
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [17]:
# separate your index creation from the data by passing in the index as a list explicitly to the series\
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'Americs','Canada'])
s

India      Tiger
Americs     Bear
Canada     Moose
dtype: object

# Querying a Series

In [20]:
# Data out of Series / Querying a Series
# A pandas Series can be queried, either by the index position or the index label
s

India      Tiger
Americs     Bear
Canada     Moose
dtype: object

In [22]:
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         'Taekwondo': 'South Korea'}
s = pd.Series(sports)

In [23]:
s.iloc[3]

'South Korea'

In [24]:
s.loc['Sumo']

'Japan'

In [25]:
# Note .iloc and .loc are not methods, they are attribute
# attribute needs square bracket [], methods use ()

In [26]:
s[3], s['Golf']

('South Korea', 'Scotland')

# Loops vs Vectorization - time 

In [27]:
# Loops are slow
s = pd.Series(np.random.randint(0,1000,1000))

In [28]:
s.head()

0    248
1    699
2    581
3    633
4    868
dtype: int64

In [30]:
len(s)

1000

In [72]:
# Note do not write any thing before timeit magic module

In [57]:
%%timeit -n 100
summary = 0
for i in s:
    summary += i

100 loops, best of 3: 208 µs per loop


In [58]:
#Vectorization, demonstrates why data scientists need to be aware of parallel computing features and
# start thinking in functional programming terms

In [59]:
%%timeit -n 100
summmary = np.sum(s)

The slowest run took 59.80 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 3: 57.7 µs per loop


In [61]:
# BROADCASTING- WE an apply an operation to every value in the series, changing the series. 
# Example To increase every variable by 2

In [68]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 1000))
s += 2

The slowest run took 8.47 times longer than the fastest. This could mean that an intermediate result is being cached.
10 loops, best of 3: 382 µs per loop


In [63]:
s.head()

0    250
1    701
2    583
3    635
4    870
dtype: int64

In [73]:
# By, procedure way

In [75]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 1000))
for label, values in s.items():
    s.set_value(label, values + 2)
#s.head()

10 loops, best of 3: 6.31 ms per loop


In [76]:
# The .loc attribute lets you not only modify data in place, but also add new data as well.
# If the value you pass in as the index doesn't exist, then a new entry is added.

In [78]:
# Mixed indexing of loc attribute
s = pd.Series([1, 2, 3, 4])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
3             4
Animal    Bears
dtype: object

In [79]:
# Index values are not unique 

In [81]:
cricket_loving_countries = pd.Series(['Aus', 'Bar', 'Pak', 'Eng'],
                                    index = ['Cricket',
                                            'Cricket',
                                            'Cricket',
                                            'Cricket',])
cricket_loving_countries

Cricket    Aus
Cricket    Bar
Cricket    Pak
Cricket    Eng
dtype: object