In [105]:
!#/usr/bin/python3

In [106]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [107]:
import pandas as pd
import numpy as np 

pd.__version__

'1.3.5'

In [108]:
# Series at a glance

int_series = pd.Series(([i for i in range(60)]))

int_series

# Another way
pd.Series(range(60))

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
      ..
50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
Length: 60, dtype: int64

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
      ..
50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
Length: 60, dtype: int64

In [109]:
int_series.size

60

In [110]:
len(int_series)

60

In [111]:
# To see what the series looks like '
# head() - gives first five items by default
# tail() - gives last five item by default

int_series.head()

int_series.tail()

0    0
1    1
2    2
3    3
4    4
dtype: int64

55    55
56    56
57    57
58    58
59    59
dtype: int64

In [112]:
# Can also be used like head(n) and tail(n)
# n is the number of records/items attribute

int_series.head(n=2)

int_series.tail(n=7)

0    0
1    1
dtype: int64

53    53
54    54
55    55
56    56
57    57
58    58
59    59
dtype: int64

In [113]:
pd.Series(range(100000))

0            0
1            1
2            2
3            3
4            4
5            5
6            6
7            7
8            8
9            9
         ...  
99990    99990
99991    99991
99992    99992
99993    99993
99994    99994
99995    99995
99996    99996
99997    99997
99998    99998
99999    99999
Length: 100000, dtype: int64

In [114]:
pd.options.display.min_rows = 20
pd.options.display.max_rows = 40

In [115]:
# Accessing and extracting values from Series 

from string import ascii_lowercase

letters = list(ascii_lowercase)
alphabet = pd.Series(letters)

alphabet.head(6)


0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [116]:
# Acccessing the series 

# First letter
alphabet[0]

# 11th letter
alphabet[10]

# first three letters
alphabet[:3]

# sixth through tenth letters
alphabet[5:10]

# last six letters
alphabet[-6:]

'a'

'k'

0    a
1    b
2    c
dtype: object

5    f
6    g
7    h
8    i
9    j
dtype: object

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [117]:
# Accessing elements by the label 

from string import ascii_uppercase

labeled_alphabet = pd.Series(data=list(ascii_lowercase),
                            index=map(lambda x: 'label_' + x, list(ascii_uppercase))
                            )

labeled_alphabet.head(3)

label_A    a
label_B    b
label_C    c
dtype: object

In [118]:
# First letter 
labeled_alphabet[0]
labeled_alphabet['label_A']

# 11th letter
labeled_alphabet[10]
labeled_alphabet['label_K']

# first three letters
labeled_alphabet[:3]
labeled_alphabet[:'label_C']
# In position based indexing, last value is not inclusive
# In label based indexing, last value is inclusive

# sixth through tenth letters
labeled_alphabet[5:10]
labeled_alphabet['label_F':'label_J']

# last six letters
labeled_alphabet[-6:]
labeled_alphabet['label_U':]


'a'

'a'

'k'

'k'

label_A    a
label_B    b
label_C    c
dtype: object

label_A    a
label_B    b
label_C    c
dtype: object

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [119]:
# add_prefix() and add_suffix() methods

alphabet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [120]:
alphabet.add_prefix('label_')

alphabet.add_suffix('_cool_ending')


label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

0_cool_ending     a
1_cool_ending     b
2_cool_ending     c
3_cool_ending     d
4_cool_ending     e
5_cool_ending     f
6_cool_ending     g
7_cool_ending     h
8_cool_ending     i
9_cool_ending     j
10_cool_ending    k
11_cool_ending    l
12_cool_ending    m
13_cool_ending    n
14_cool_ending    o
15_cool_ending    p
16_cool_ending    q
17_cool_ending    r
18_cool_ending    s
19_cool_ending    t
20_cool_ending    u
21_cool_ending    v
22_cool_ending    w
23_cool_ending    x
24_cool_ending    y
25_cool_ending    z
dtype: object

In [121]:
# Dot notation to access Series 

labeled_alphabet['label_V']
labeled_alphabet.label_V

labeled_alphabet['label_V'] == labeled_alphabet.label_V

'v'

'v'

True

In [122]:
# Booleans masks and .loc Indexer

labeled_alphabet['label_F':'label_J']

# loc 
labeled_alphabet.loc['label_F':'label_J']

# loc is the prototypical way of label based extraction 
# Series slicing method supports a variety of inputs unlike loc
# loc is faster than series slicing

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [123]:
# boolean masks 

books_list = ['Fooled by Randomness', 'Sapiens', 'It Ends with Us']
books_series = pd.Series(data=books_list, index=['funny', 'anthro', 'emotional'], dtype='object')
books_series

books_series.loc[[True, False, True]]
# length of the masks must match the length of the series

funny        Fooled by Randomness
anthro                    Sapiens
emotional         It Ends with Us
dtype: object

funny        Fooled by Randomness
emotional         It Ends with Us
dtype: object

In [124]:
# number of rows 
labeled_alphabet.size

# number of rows and columns
labeled_alphabet.shape

26

(26,)

In [125]:
labeled_alphabet.loc[[True for i in range(26)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [126]:
labeled_alphabet.loc[[True if i%2==0 else False for i in range(26)]]

# Boolean masks 
# - used to index select items at scale
# - work with both slicing and the loc indexer

label_A    a
label_C    c
label_E    e
label_G    g
label_I    i
label_K    k
label_M    m
label_O    o
label_Q    q
label_S    s
label_U    u
label_W    w
label_Y    y
dtype: object

In [127]:
# Extracting by Position with iloc
# iloc => integer loc => indexing by position 
# loc => location => indexing by label

# Indexing by iloc is also 0 based


labeled_alphabet.iloc[0]
# same as
labeled_alphabet[0]

labeled_alphabet.iloc[1]
# same as
labeled_alphabet[1]

labeled_alphabet.iloc[0:3]
# same as
labeled_alphabet[0:3]

# Selecting by specifying index position
# Passing a Python list as arguments 
labeled_alphabet.iloc[[2,4,10]]
# same as
labeled_alphabet[[2,4,10]]

'a'

'a'

'b'

'b'

label_A    a
label_B    b
label_C    c
dtype: object

label_A    a
label_B    b
label_C    c
dtype: object

label_C    c
label_E    e
label_K    k
dtype: object

label_C    c
label_E    e
label_K    k
dtype: object

In [128]:
# Using Callables with .loc and .iloc
# A callable is an object that accepts some argument and returns an object

labeled_alphabet.loc['label_V']

labeled_alphabet.loc[lambda x:'label_V']

'v'

'v'

In [129]:
labeled_alphabet.loc[lambda x:['label_V', 'label_A']]

label_V    v
label_A    a
dtype: object

In [130]:
labeled_alphabet.loc[lambda x: [True for i in range(x.size)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [131]:
def every_fifth(x):
    return [True if (i+1)%5==0 else False for i in range(x.size)]

labeled_alphabet.iloc[every_fifth]

# Used for highly customizable indexing
# a single-argument function that returns indexing output

label_E    e
label_J    j
label_O    o
label_T    t
label_Y    y
dtype: object

In [132]:
# Selecting with .get method 

labeled_alphabet.get('label_V')
# same as 
labeled_alphabet.loc['label_V']
# same as 
labeled_alphabet['label_V']

'v'

'v'

'v'

In [133]:
labeled_alphabet.get('label_inexistent') # returns nothing 

labeled_alphabet.get('label_inexistent') == True
# because get supports to the default parameter None 
# This is elegant because it provides an opportunity to 
# specify a custom default value

labeled_alphabet.get('label_inexistent', default=None)

labeled_alphabet.get('label_inexistent', default="Could not find value related to that label")

labeled_alphabet.get('label_inexistent', default=19)

labeled_alphabet.get('label_inexistent', default={19:'20'})

# If we pass a non-existent label via loc label based indexing
# it throws an index error.

False

'Could not find value related to that label'

19

{19: '20'}

In [134]:
# get allows to index either by label or by position

labeled_alphabet.get(8)
# same as 
labeled_alphabet.iloc[8]
# same as
labeled_alphabet[8]

'i'

'i'

'i'

In [135]:
# Selection by Label

# [] idx   -- slices, callables, boolean masks
#    series['label']

# .loc[]   -- slices, callables, boolean masks
#    series.loc['label']

# dot access -- no slice or boolean mask support
#    series.label  (can only be used with well formed labels)

# .get     -- no slice support, provides default, forigiving
#    series.get('label')

In [136]:
# Selection by position

# [] idx   -- series[0]
# .iloc    -- series.iloc[0]
# No dot access with position
# .get()   -- series.get(0)

In [137]:
# Andy's recommendation 

# Use loc if indexing by label 
# Use iloc if indexing by position 

In [138]:
# Skills challenge 

# 1) Create a series of length 100 containing the squares of integers from
#    0 to 99. Assign it to the variable squares 

squares = pd.Series(data=[i*i for i in range(100)])
squares


# 2) Extract the last three items from the squares series using square bracket indexing 
squares[-3:]

# 3) Repeat step 2 but using the .tail() method instead.
squares.tail(3)

# 4 Verify that the output of steps 2 and 3 is the same using equals method

squares[-3:].equals(squares.tail(3))




0        0
1        1
2        4
3        9
4       16
5       25
6       36
7       49
8       64
9       81
      ... 
90    8100
91    8281
92    8464
93    8649
94    8836
95    9025
96    9216
97    9409
98    9604
99    9801
Length: 100, dtype: int64

97    9409
98    9604
99    9801
dtype: int64

97    9409
98    9604
99    9801
dtype: int64

True