# Series
_a sequence of values with associated labels_
* one dimensional
* labeled arrays
* any data type

In [144]:
import pandas as pd

### Check pandas version

In [145]:
pd.__version__

'2.2.2'

### Python list to series

In [146]:
python_list = [87, 45, 65, 97, 12, 35]

In [147]:
type(python_list)

list

In [148]:
to_series = pd.Series(python_list)

In [149]:
to_series

0    87
1    45
2    65
3    97
4    12
5    35
dtype: int64

In [150]:
books = ['Fooled by randomness', 'The creative code', 'MathQBank']

In [151]:
book_series = pd.Series(books)

In [152]:
book_series

0    Fooled by randomness
1       The creative code
2               MathQBank
dtype: object

In [153]:
price = [12, 8, 7]

In [154]:
price_int_series = pd.Series(price)

In [155]:
price_int_series

0    12
1     8
2     7
dtype: int64

### Support mix data type

In [156]:
book_and_price = [books, price]

In [157]:
book_and_price_series = pd.Series(book_and_price)

In [158]:
book_and_price_series

0    [Fooled by randomness, The creative code, Math...
1                                           [12, 8, 7]
dtype: object

### Series form Python dictionary

In [159]:
books_dict = {'Fooled by randomness': 12,
              'The creative code': 8, 
              'MathQBank': 7}

In [160]:
books_dict_to_series = pd.Series(books_dict)

In [161]:
books_dict_to_series

Fooled by randomness    12
The creative code        8
MathQBank                7
dtype: int64

### specify data type
* dtype='type'
* strings are always object

In [162]:
price_int_series

0    12
1     8
2     7
dtype: int64

In [163]:
int_to_float = pd.Series(price_int_series, dtype='float')

In [164]:
int_to_float

0    12.0
1     8.0
2     7.0
dtype: float64

### dtype('0')
* dtype('O') means Pandas string
* 'O' stands for object

In [165]:
book_series

0    Fooled by randomness
1       The creative code
2               MathQBank
dtype: object

In [166]:
book_series.dtype

dtype('O')

## Index

 * custom index

In [167]:
pd.Series(data=books, index=['B1','B2','B3'])

B1    Fooled by randomness
B2       The creative code
B3               MathQBank
dtype: object

In [168]:
book_series.index

RangeIndex(start=0, stop=3, step=1)

In [169]:
books.index

<function list.index(value, start=0, stop=9223372036854775807, /)>

## name attribute

In [170]:
book_series.name

#### nothing back, we can verify it using boolean

In [171]:
book_series.name == None

True

#### lets give it a name

In [172]:
book_series.name = 'book_list'

In [173]:
book_series

0    Fooled by randomness
1       The creative code
2               MathQBank
Name: book_list, dtype: object

#### series name becomes column name in dataframe

#### index can also have name

In [174]:
book_series.index.name

In [175]:
book_series.index.name == None

True

In [176]:
book_series.index.name = 'Index_number'

In [177]:
book_series

Index_number
0    Fooled by randomness
1       The creative code
2               MathQBank
Name: book_list, dtype: object

### Exercise

In [178]:
actor_name = ['actor_a', 'actor_b', 'actor_c', 'actor_d']

In [179]:
actor_age = [34, 40.5, 67, 25.7]

In [180]:
actors = pd.Series(actor_age, index=actor_name, name='actors_age')

In [181]:
actors

actor_a    34.0
actor_b    40.5
actor_c    67.0
actor_d    25.7
Name: actors_age, dtype: float64

In [182]:
actors.name = "actors_age"

In [183]:
actors.index.name = 'name'

In [184]:
actors

name
actor_a    34.0
actor_b    40.5
actor_c    67.0
actor_d    25.7
Name: actors_age, dtype: float64

### create dictionary form list

In [185]:
# actor_name: actor_age
actor_dict = dict(zip(actor_name, actor_age))
    
actor_dict

{'actor_a': 34, 'actor_b': 40.5, 'actor_c': 67, 'actor_d': 25.7}

In [186]:
actorseriesfromdict = pd.Series(actor_dict)

In [187]:
actorseriesfromdict

actor_a    34.0
actor_b    40.5
actor_c    67.0
actor_d    25.7
dtype: float64

In [188]:
actorseriesfromdict.name = 'actor'

In [189]:
actorseriesfromdict.index.name = 'actor name'

In [190]:
actorseriesfromdict

actor name
actor_a    34.0
actor_b    40.5
actor_c    67.0
actor_d    25.7
Name: actor, dtype: float64

### dictionary comprehension

In [191]:
{name:age for name,age in zip(actor_name, actor_age)}

{'actor_a': 34, 'actor_b': 40.5, 'actor_c': 67, 'actor_d': 25.7}

In [192]:
pd.Series({name:age for name,age in zip(actor_name, actor_age)})

actor_a    34.0
actor_b    40.5
actor_c    67.0
actor_d    25.7
dtype: float64

### head and tail

In [193]:
range_series = pd.Series(range(50))

In [194]:
range_series.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [195]:
range_series.tail(n=7)

43    43
44    44
45    45
46    46
47    47
48    48
49    49
dtype: int64

In [196]:
len(range_series)

50

In [197]:
pd.options.display.min_rows = 20

In [198]:
range_series

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
26    26
27    27
28    28
29    29
30    30
31    31
32    32
33    33
34    34
35    35
36    36
37    37
38    38
39    39
40    40
41    41
42    42
43    43
44    44
45    45
46    46
47    47
48    48
49    49
dtype: int64

## Extractig by index position

In [199]:
from string import ascii_lowercase

In [200]:
ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [201]:
ascii_lowercase[0]

'a'

In [202]:
ascii_lowercase[10]

'k'

In [203]:
ascii_lowercase[25]

'z'

In [204]:
ascii_series = pd.Series(ascii_lowercase)

In [205]:
ascii_series

0    abcdefghijklmnopqrstuvwxyz
dtype: object

* this is not what i expected. lets convert it to a list to breakdown by each letter

In [206]:
letters = list(ascii_lowercase)

In [207]:
letter_series = pd.Series(letters)

In [208]:
letter_series.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [209]:
len(letter_series)

26

In [210]:
letter_series[7]

'h'

* What is the first letter?
* What is the 11th letter?
* What are the sixth through tenth letter?
* what are the last ten letter?
* what are the first 3 letter?

In [211]:
first_letter = letter_series[0]
first_letter

'a'

In [212]:
eleventh_letter = letter_series[10]
eleventh_letter

'k'

In [213]:
six_to_ten = letter_series[5:10]
six_to_ten

5    f
6    g
7    h
8    i
9    j
dtype: object

In [214]:
last_10_letter = letter_series.tail(10)
last_10_letter

16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [215]:
last_10_letter = letter_series[-10:]
last_10_letter

16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [216]:
first_3 = letter_series[0:3]
first_3

0    a
1    b
2    c
dtype: object

### label indexing

In [217]:
from string import ascii_uppercase

In [218]:
uppercase = list(ascii_uppercase)

In [219]:
data = pd.Series(data=list(ascii_uppercase), index=map(lambda x: 'label_' + x, list(ascii_lowercase)))

In [220]:
data.head(10)

label_a    A
label_b    B
label_c    C
label_d    D
label_e    E
label_f    F
label_g    G
label_h    H
label_i    I
label_j    J
dtype: object

In [221]:
data[0]

  data[0]


'A'

In [222]:
data['label_a']

'A'

### prefix suffix

In [223]:
letter_series.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [224]:
indexed_series = letter_series.add_prefix('label_')
indexed_series

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [225]:
letter_series.add_suffix('_ending')

0_ending     a
1_ending     b
2_ending     c
3_ending     d
4_ending     e
5_ending     f
6_ending     g
7_ending     h
8_ending     i
9_ending     j
10_ending    k
11_ending    l
12_ending    m
13_ending    n
14_ending    o
15_ending    p
16_ending    q
17_ending    r
18_ending    s
19_ending    t
20_ending    u
21_ending    v
22_ending    w
23_ending    x
24_ending    y
25_ending    z
dtype: object

In [226]:
letter_series.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

### .loc iloc

In [227]:
indexed_series['label_0']

'a'

In [228]:
indexed_series['label_0':'label_10']

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
dtype: object

In [229]:
indexed_series.loc['label_0':'label_10']

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
dtype: object

### boolean
* used to index select items at scale
* work with [] and .loc
* need to be same tlength as series
* _pd.Series(['A', 'B', 'C'])[[True, False, True]]_ will return A, C

In [230]:
book_series.loc[[True, True, True]]

Index_number
0    Fooled by randomness
1       The creative code
2               MathQBank
Name: book_list, dtype: object

In [231]:
indexed_series.size

26

.loc
* indexing by label

In [232]:
indexed_series.loc[[True for i in range(26)]]

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [233]:
indexed_series.loc[[True if i%2==0 else False for i in range(26)]]

label_0     a
label_2     c
label_4     e
label_6     g
label_8     i
label_10    k
label_12    m
label_14    o
label_16    q
label_18    s
label_20    u
label_22    w
label_24    y
dtype: object

.iloc
* supports extraction by position
* integer loc

In [234]:
indexed_series.iloc[0]

'a'

In [235]:
indexed_series.iloc[5:10]

label_5    f
label_6    g
label_7    h
label_8    i
label_9    j
dtype: object

In [236]:
indexed_series.iloc[[0, 5, 10]]

label_0     a
label_5     f
label_10    k
dtype: object

Callables with loc and iloc
* callables is an object that accepts some arguments, and possibly returns something back
* used for highly customized indexing
* work with [], .loc and .iloc
* a single argument function that returns indexing output:
     - a list of labels
     - a list of booleans
     - a slice etc.

In [237]:
indexed_series.loc['label_10']

'k'

In [238]:
indexed_series['label_10']

'k'

In [239]:
indexed_series.loc[lambda x: 'label_10']

'k'

In [240]:
indexed_series.loc[lambda x: ['label_10', 'label_0']]

label_10    k
label_0     a
dtype: object

In [241]:
book_series.loc[lambda x: [True, False, True]]

Index_number
0    Fooled by randomness
2               MathQBank
Name: book_list, dtype: object

In [242]:
indexed_series.loc[lambda x: [True for i in range(x.size)]]

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [243]:
def every_fifth(x):
    return [True if i%2==0 else False for i in range(x.size)]

In [244]:
indexed_series.iloc[every_fifth]

label_0     a
label_2     c
label_4     e
label_6     g
label_8     i
label_10    k
label_12    m
label_14    o
label_16    q
label_18    s
label_20    u
label_22    w
label_24    y
dtype: object

In [245]:
indexed_series.loc[every_fifth]

label_0     a
label_2     c
label_4     e
label_6     g
label_8     i
label_10    k
label_12    m
label_14    o
label_16    q
label_18    s
label_20    u
label_22    w
label_24    y
dtype: object

In [246]:
def vowels(x):
    return [True if x[i]=='a' or x[i]=='e' or x[i]=='i' or x[i]=='o' or x[i]=='u' else False for i in range(x.size)]

In [247]:
indexed_series.iloc[vowels]

  return [True if x[i]=='a' or x[i]=='e' or x[i]=='i' or x[i]=='o' or x[i]=='u' else False for i in range(x.size)]


label_0     a
label_4     e
label_8     i
label_14    o
label_20    u
dtype: object

Get method
* get() has a default value set as none
* if the specified label doesn't exist or not found it return default value
* work with indexed position as well as labeled position
* also take functions as input
* wheres, .loc and .iloc errors out

In [248]:
indexed_series.get('label_0')

'a'

In [249]:
indexed_series.get('label_notexist', default='label not found')

'label not found'

In [250]:
indexed_series.get('label_notexist', default={'label': False})

{'label': False}

In [251]:
indexed_series.get(4)

  indexed_series.get(4)


'e'

In [252]:
indexed_series.get(vowels)

  return [True if x[i]=='a' or x[i]=='e' or x[i]=='i' or x[i]=='o' or x[i]=='u' else False for i in range(x.size)]


label_0     a
label_4     e
label_8     i
label_14    o
label_20    u
dtype: object

# Summarize

## selection by label
* []indexing    
    - series['label']   
    - slices, callables(function), booleans masks

* .loc[]    
    - series.loc['label'] 
    - slices, callables(function), booleans masks

* dot access    
    - series.label    
    - no slice or boolean mask support
* .get()    
    - series.get('label') 
    - no slice support, provides default values, callables

## Selection by position

* []indexing    
    - series[10]   
    - slices, callables(function), booleans masks

* .iloc[]    
    - series.loc[10] 
    - slices, callables(function), booleans masks

* .get()    
    - series.get(10) 
    - no slice support, provides default values, callables

# Recap

In [253]:
squares = [i*i for i in range(100)]
squares

[0,
 1,
 4,
 9,
 16,
 25,
 36,
 49,
 64,
 81,
 100,
 121,
 144,
 169,
 196,
 225,
 256,
 289,
 324,
 361,
 400,
 441,
 484,
 529,
 576,
 625,
 676,
 729,
 784,
 841,
 900,
 961,
 1024,
 1089,
 1156,
 1225,
 1296,
 1369,
 1444,
 1521,
 1600,
 1681,
 1764,
 1849,
 1936,
 2025,
 2116,
 2209,
 2304,
 2401,
 2500,
 2601,
 2704,
 2809,
 2916,
 3025,
 3136,
 3249,
 3364,
 3481,
 3600,
 3721,
 3844,
 3969,
 4096,
 4225,
 4356,
 4489,
 4624,
 4761,
 4900,
 5041,
 5184,
 5329,
 5476,
 5625,
 5776,
 5929,
 6084,
 6241,
 6400,
 6561,
 6724,
 6889,
 7056,
 7225,
 7396,
 7569,
 7744,
 7921,
 8100,
 8281,
 8464,
 8649,
 8836,
 9025,
 9216,
 9409,
 9604,
 9801]

In [254]:
square_series = pd.Series(data=list(squares), name='Squared_number')

In [260]:
test = pd.Series(data=list(i*i for i in range(100)), name="squared_number")

In [261]:
test.head()

0     0
1     1
2     4
3     9
4    16
Name: squared_number, dtype: int64

In [256]:
square_series.head()

0     0
1     1
2     4
3     9
4    16
Name: Squared_number, dtype: int64

### last 3 digit 

In [266]:
indexing = square_series[-3:]
indexing

97    9409
98    9604
99    9801
Name: Squared_number, dtype: int64

In [269]:
test.iloc[-3:]

97    9409
98    9604
99    9801
Name: squared_number, dtype: int64

In [267]:
tailing = square_series.tail(3)
tailing

97    9409
98    9604
99    9801
Name: Squared_number, dtype: int64

In [268]:
indexing.equals(tailing)

True