In [1]:
import pandas as pd

## Lecture 1 - What Is A Series

In [2]:
students = ['Andrew', 'Joseph', 'Daniel']
type(students)

list

In [3]:
pd.Series(students)

0    Andrew
1    Joseph
2    Daniel
dtype: object

In [4]:
ages = [27, 37, 56]
pd.Series(ages)

0    27
1    37
2    56
dtype: int64

In [5]:
heights = [165., 170.5, 185]
pd.Series(heights)

0    165.0
1    170.5
2    185.0
dtype: float64

In [6]:
mixed = [True, "say", {"number": 100}]
pd.Series(mixed)

0               True
1                say
2    {'number': 100}
dtype: object

## Parameters Vs. Arguments

In [7]:
pd.Series(students)

0    Andrew
1    Joseph
2    Daniel
dtype: object

In [8]:
pd.Series(data=students)

0    Andrew
1    Joseph
2    Daniel
dtype: object

In [9]:
books_list = ["Best Secret", "Seven Kingdoms", "My Hero"]
list_s = pd.Series(books_list)

In [10]:
books_dict = {0: "Best Secret", 1: "Seven Kingdoms", 2: "My Hero"}
dict_s = pd.Series(books_dict)


In [11]:
list_s.equals(dict_s)

True

In [12]:
pd.Series(123)

0    123
dtype: int64

In [13]:
pd.Series("Mama")

0    Mama
dtype: object

## D.Type Attribute

In [14]:
pd.Series(ages)

0    27
1    37
2    56
dtype: int64

In [15]:
pd.Series(ages, dtype='float')

0    27.0
1    37.0
2    56.0
dtype: float64

## Index And RangeIndex

In [16]:
books_list

['Best Secret', 'Seven Kingdoms', 'My Hero']

In [17]:
pd.Series(data=books_list, index=['mystery', 'history', 'adventure'])

mystery         Best Secret
history      Seven Kingdoms
adventure           My Hero
dtype: object

In [18]:
list_s.index, type(list_s.index)

(RangeIndex(start=0, stop=3, step=1), pandas.core.indexes.range.RangeIndex)

In [19]:
list(pd.RangeIndex(start=4, stop=7, step=1))

[4, 5, 6]

In [20]:
list(pd.RangeIndex(start=10, stop=-11, step=-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10]

## Series And Index Names

In [21]:
books_series = list_s

In [22]:
# capable of being understood

In [23]:
books_series

0       Best Secret
1    Seven Kingdoms
2           My Hero
dtype: object

In [24]:
books_series.size

3

In [25]:
list_s.equals(dict_s)

True

In [26]:
books_series.name is None

True

In [27]:
books_series.name = 'My Fav Books'
books_series

0       Best Secret
1    Seven Kingdoms
2           My Hero
Name: My Fav Books, dtype: object

In [28]:
books_series.index.name is None

True

In [29]:
books_series.index.name = 'My Books'
books_series

My Books
0       Best Secret
1    Seven Kingdoms
2           My Hero
Name: My Fav Books, dtype: object

## Exercises

In [30]:
actor_names = [
    "Robert Downey Jr.",
    "Meryl Streep",
    "Leonardo DiCaprio",
    "Viola Davis",
    "Denzel Washington"
]
actor_ages = [59.3, 74.2, 49, 58, 69.4]


In [31]:
actor_series = pd.Series(data=actor_ages, index=actor_names, name='actors')

In [32]:
actor_series

Robert Downey Jr.    59.3
Meryl Streep         74.2
Leonardo DiCaprio    49.0
Viola Davis          58.0
Denzel Washington    69.4
Name: actors, dtype: float64

In [33]:
actors_dict = { 'Robert Downey Jr.': 59.3, 'Meryl Streep': 74.2, 'Leonardo DiCaprio': 49, 'Viola Davis': 58, 'Denzel Washington': 69.4 }

In [34]:
pd.Series(data=actors_dict)

Robert Downey Jr.    59.3
Meryl Streep         74.2
Leonardo DiCaprio    49.0
Viola Davis          58.0
Denzel Washington    69.4
dtype: float64

In [35]:
list(zip(actor_names, actor_ages))

[('Robert Downey Jr.', 59.3),
 ('Meryl Streep', 74.2),
 ('Leonardo DiCaprio', 49),
 ('Viola Davis', 58),
 ('Denzel Washington', 69.4)]

In [36]:
pd.Series(dict(zip(actor_names, actor_ages)))

Robert Downey Jr.    59.3
Meryl Streep         74.2
Leonardo DiCaprio    49.0
Viola Davis          58.0
Denzel Washington    69.4
dtype: float64

## Another Solution

In [37]:
{name:age for name,age in zip(actor_names, actor_ages)}

{'Robert Downey Jr.': 59.3,
 'Meryl Streep': 74.2,
 'Leonardo DiCaprio': 49,
 'Viola Davis': 58,
 'Denzel Washington': 69.4}

## Head And Tail

In [38]:
int_series = pd.Series(range(60))
int_series.size, len(int_series)

(60, 60)

In [39]:
int_series.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [40]:
int_series.tail(n=3)

57    57
58    58
59    59
dtype: int64

## Extracting By Index Position

In [41]:
from string import ascii_lowercase

ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [42]:
pd.Series(ascii_lowercase)

0    abcdefghijklmnopqrstuvwxyz
dtype: object

In [43]:
list(ascii_lowercase), len(list(ascii_lowercase))

(['a',
  'b',
  'c',
  'd',
  'e',
  'f',
  'g',
  'h',
  'i',
  'j',
  'k',
  'l',
  'm',
  'n',
  'o',
  'p',
  'q',
  'r',
  's',
  't',
  'u',
  'v',
  'w',
  'x',
  'y',
  'z'],
 26)

In [44]:
letters = list(ascii_lowercase)

alphabet = pd.Series(letters)
alphabet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [45]:
alphabet[0]

'a'

In [46]:
alphabet[:3]

0    a
1    b
2    c
dtype: object

In [47]:
alphabet[5:10]

5    f
6    g
7    h
8    i
9    j
dtype: object

In [48]:
alphabet[-6:]

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

## Accessing Elements By Label

In [49]:
from string import ascii_uppercase

ascii_uppercase

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [50]:
labeled_alphabet = pd.Series(list(ascii_lowercase), index=map(lambda x: f'label_{x}', list(ascii_uppercase)))
labeled_alphabet.head()

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
dtype: object

In [51]:
labeled_alphabet.iloc[0], labeled_alphabet['label_A']

('a', 'a')

In [52]:
labeled_alphabet.iloc[10], labeled_alphabet['label_K']

('k', 'k')

In [53]:
labeled_alphabet.iloc[:3]

label_A    a
label_B    b
label_C    c
dtype: object

In [54]:
labeled_alphabet[:'label_C']

label_A    a
label_B    b
label_C    c
dtype: object

In [55]:
labeled_alphabet[5:10]

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [56]:
labeled_alphabet['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [57]:
labeled_alphabet[-6:]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [58]:
labeled_alphabet['label_U':]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

## add_prefix() and add_suffix()

In [59]:
alphabet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [60]:
alphabet.add_prefix('label_')

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [61]:
alphabet = alphabet.add_suffix('_cool_ending')

In [62]:
alphabet.head()

0_cool_ending    a
1_cool_ending    b
2_cool_ending    c
3_cool_ending    d
4_cool_ending    e
dtype: object

## Using Dot Notation

In [63]:
labeled_alphabet['label_V'], labeled_alphabet.label_V


('v', 'v')

In [64]:
labeled_alphabet['label_V':'label_X']

label_V    v
label_W    w
label_X    x
dtype: object

## Boolean Masks And The .loc Indexer

In [65]:
labeled_alphabet['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [66]:
# loc

labeled_alphabet.loc['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [67]:
# boolean

books_series

My Books
0       Best Secret
1    Seven Kingdoms
2           My Hero
Name: My Fav Books, dtype: object

In [68]:
books_series.loc[[True, False, True]]

My Books
0    Best Secret
2        My Hero
Name: My Fav Books, dtype: object

In [69]:
labeled_alphabet.size

26

In [70]:
# labeled_alphabet.loc[True, False, True]

labeled_alphabet.loc[[True for i in range(labeled_alphabet.size)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [71]:
labeled_alphabet.loc[[True if i%2==0 else False for i in range(labeled_alphabet.size)]]

label_A    a
label_C    c
label_E    e
label_G    g
label_I    i
label_K    k
label_M    m
label_O    o
label_Q    q
label_S    s
label_U    u
label_W    w
label_Y    y
dtype: object

## Extracting By Position With .iloc

In [72]:
# iloc => integer loc => indexing by position
# loc => location => indexing by label

In [73]:
labeled_alphabet.iloc[0], labeled_alphabet.iloc[1]

('a', 'b')

In [74]:
labeled_alphabet.iloc[1:3]

label_B    b
label_C    c
dtype: object

In [75]:
labeled_alphabet.iloc[[1, 4, 9]]

label_B    b
label_E    e
label_J    j
dtype: object

## Using Callables With .loc And .iloc

In [76]:
labeled_alphabet.loc['label_V']

'v'

In [77]:
labeled_alphabet.loc[lambda x: 'label_V']

'v'

In [78]:
labeled_alphabet.loc[lambda x: ['label_V', 'label_X']]


label_V    v
label_X    x
dtype: object

In [79]:
labeled_alphabet.loc[lambda x: [True if i%2==0 else False for i in range(labeled_alphabet.size)]]

label_A    a
label_C    c
label_E    e
label_G    g
label_I    i
label_K    k
label_M    m
label_O    o
label_Q    q
label_S    s
label_U    u
label_W    w
label_Y    y
dtype: object

In [80]:
def every_fifth(x):
    return [True if i%5==0 else False for i in range(x.size)]

In [81]:
labeled_alphabet.iloc[every_fifth]

label_A    a
label_F    f
label_K    k
label_P    p
label_U    u
label_Z    z
dtype: object

## Selecting With .get()

In [82]:
labeled_alphabet.get('label_I'), labeled_alphabet.loc['label_I'], labeled_alphabet['label_I']

('i', 'i', 'i')

In [83]:
labeled_alphabet.get('label_Unknown', default='Could not find any labels')

'Could not find any labels'

## Challenge

1. Create a series of length 100 containing the squares of integers from 0 to 99. Assign it to the variable squares.

In [89]:
squares = pd.Series([i**2 for i in range(100)])
squares.tail()

95    9025
96    9216
97    9409
98    9604
99    9801
dtype: int64

2. Extract the last three items from the squares series using square bracket indexing

In [91]:
squares[-3:]

97    9409
98    9604
99    9801
dtype: int64

3. Repeat step 2 but using tail 

In [92]:
squares.tail(3)

97    9409
98    9604
99    9801
dtype: int64

4. Verify that output of steps 2 and 3 is the same using the equals method

In [93]:
squares.tail(3).equals(squares[-3:])

True