##### python imports

In [1]:
import pandas as pd
from string import ascii_lowercase, ascii_uppercase

### Lecture 1 - What is a Series?

In [2]:
students = ['Ivan', 'Peter', 'Maria']

In [3]:
type(students)

list

In [4]:
pd.Series(students)

0     Ivan
1    Peter
2    Maria
dtype: object

In [5]:
ages = [20, 18, 14]
pd.Series(ages)

0    20
1    18
2    14
dtype: int64

In [6]:
heights = [180.2, 178.5, 156.3]
pd.Series(heights)

0    180.2
1    178.5
2    156.3
dtype: float64

In [7]:
mixed_data = [True, 'Hello', {'key': 100}, 5, 6.7, [1, 2, 3]]
pd.Series(mixed_data)

0            True
1           Hello
2    {'key': 100}
3               5
4             6.7
5       [1, 2, 3]
dtype: object

### Lecture 2 - Parameters vs Arguments

In [8]:
pd.Series(data=students)

0     Ivan
1    Peter
2    Maria
dtype: object

In [9]:
# General programming terminology:
# data -> method or function definition (parameter)
# students -> python list which its passed as value (argument)
# example
def functon(parameter):
    print(parameter)
functon('argument')

argument


### Lecture 3 - What is The Data?

In [10]:
book_list = ['book_1', 'book_2', 'book_3']
pd.Series(book_list)

0    book_1
1    book_2
2    book_3
dtype: object

In [11]:
book_dict = {0:'book_1', 1: 'book_2', 2: 'book_3'}
pd.Series(book_dict)

0    book_1
1    book_2
2    book_3
dtype: object

In [12]:
list_s = pd.Series(book_list)
dict_s = pd.Series(book_dict)
list_s.equals(dict_s)

True

### Lecture 4 - The .dtype Attribute

In [13]:
pd.Series(ages)

0    20
1    18
2    14
dtype: int64

In [14]:
pd.Series(ages, dtype=float)

0    20.0
1    18.0
2    14.0
dtype: float64

In [15]:
pd.Series(ages, dtype=str)

0    20
1    18
2    14
dtype: object

### BONUS: What is dtype('o'), Really?

In [16]:
pd.Series(heights)

0    180.2
1    178.5
2    156.3
dtype: float64

In [17]:
# from NumPy -> if one of the value is object, pandas will keep the data with index in the memory
heights_2 = [180.2, '178.5', 156.3]
pd.Series(heights_2)

0    180.2
1    178.5
2    156.3
dtype: object

### Lecture 5 - Index and RangeIndex

In [18]:
pd.Series(data=book_list, index=['first_book', 'second_book', 'third_book'])

first_book     book_1
second_book    book_2
third_book     book_3
dtype: object

In [19]:
pd.__version__

'1.3.2'

In [20]:
# After version 1.0.0, pandas has string dtype
pd.Series(data=book_list, index=['first_book', 'second_book', 'third_book'], dtype='string')

first_book     book_1
second_book    book_2
third_book     book_3
dtype: string

In [21]:
list_s.index
# its a range index

RangeIndex(start=0, stop=3, step=1)

In [22]:
type(list_s.index)

pandas.core.indexes.range.RangeIndex

In [23]:
# lets create our own range of indexes
pd.RangeIndex(start=5, stop=15, step=3)
# must cast to list to see the result
list(pd.RangeIndex(start=5, stop=15, step=3))

[5, 8, 11, 14]

In [24]:
# start is inclusive, but stop is exclusive
list(pd.RangeIndex(start=10, stop=-11, step=-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10]

### Lecture 6 - Series and Index Names

In [25]:
#inetilligble -> capable of being understood
book_series = list_s
book_series

0    book_1
1    book_2
2    book_3
dtype: object

In [26]:
# method is a function bound to the object
list_s.equals(dict_s)

True

In [27]:
# attribute is a varible bound to the object
book_series.size

3

In [28]:
# the name attribute is pointing to None obj
book_series.name

In [29]:
# if it comapre to None ...
book_series.name == None

True

In [30]:
# we could give it a name
book_series.name = 'my books'
book_series
# after we learn about dataFrame, we will see that the name of the Series will come ...
# with the name of the column of the dataFrame

0    book_1
1    book_2
2    book_3
Name: my books, dtype: object

In [31]:
# the same is with the indexes of the Series
book_series.index.name == None

True

In [32]:
book_series.index.name = 'index names'
book_series

index names
0    book_1
1    book_2
2    book_3
Name: my books, dtype: object

### Lecture 7 - Skill Challenge

In [33]:
# 1. Python list with len=4 with favorite actors
actor_names = ['Actor1', 'Actor2', 'Actor3', 'Actor4',]
# 2. Python list with len=4 with actors ages
actor_ages = [25, 35, 45, 55]
# 3. Series that stores actor ages and for index (labels) are useing actor names
actor_series = pd.Series(data=actor_ages, index=actor_names)
actor_series

Actor1    25
Actor2    35
Actor3    45
Actor4    55
dtype: int64

In [34]:
# 4. Repeat p.3 but with python dict. Try to not write the dict by hand, but dynamically
# 1st var -> using zip (the keys are indexes, and vals are values)
actor_dict_1 = dict(zip(actor_names, actor_ages))
actor_series_dict_1 = pd.Series(actor_dict_1)
actor_series_dict_1

Actor1    25
Actor2    35
Actor3    45
Actor4    55
dtype: int64

In [35]:
# 4.
# 2st var -> using dict comprehension
# actor_dict_2 = {actor_names[i]: actor_ages[i] for i in range(len(actor_names))}
actor_dict_2 = {name: age for name, age in zip(actor_names, actor_ages)}
actor_series_dict_2 = pd.Series(actor_dict_2)
actor_series_dict_2

Actor1    25
Actor2    35
Actor3    45
Actor4    55
dtype: int64

### Lecture 8 - The head() and tail() methods

In [36]:
int_series = pd.Series(i for i in range(10000))
int_series

0          0
1          1
2          2
3          3
4          4
        ... 
9995    9995
9996    9996
9997    9997
9998    9998
9999    9999
Length: 10000, dtype: int64

In [37]:
# it can be done only with range
int_series_2 = pd.Series(range(10000))
int_series_2

0          0
1          1
2          2
3          3
4          4
        ... 
9995    9995
9996    9996
9997    9997
9998    9998
9999    9999
Length: 10000, dtype: int64

In [38]:
# to see the first 5 rows
int_series.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [39]:
# if you want to see certian number of first rows
int_series.head(3)

0    0
1    1
2    2
dtype: int64

In [40]:
# to see the last five rows
int_series_2.tail()

9995    9995
9996    9996
9997    9997
9998    9998
9999    9999
dtype: int64

In [41]:
# if you want to see the certian number of last rows
int_series_2.tail(3)

9997    9997
9998    9998
9999    9999
dtype: int64

In [42]:
# we can use options to desplay certian rows, but its not useful
# pd.options.display.min_rows = 20

### Lecture 9 - Extracting by Index Position

In [43]:
letters = list(ascii_lowercase)
len(letters)

26

In [44]:
alphabet = pd.Series(letters)
alphabet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [45]:
# what is the first letter?
alphabet[0]

'a'

In [46]:
# what is the 11th letter?
alphabet[10]

'k'

In [47]:
# what are the first 3 letters?
# not inclusive!!!
alphabet[:3]

0    a
1    b
2    c
dtype: object

In [48]:
# what are the letters from 6 to 10
alphabet[5:10]

5    f
6    g
7    h
8    i
9    j
dtype: object

In [49]:
# what are the last 6 letters?
alphabet[-6:]

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

### Lecture 10 - Accessing Elements by Label

In [50]:
label_alphabet = pd.Series(data=list(ascii_lowercase), index=map(lambda x: 'label_' + x, list(ascii_uppercase)))
label_alphabet.head(3)

label_A    a
label_B    b
label_C    c
dtype: object

In [51]:
# what is the first letter?
# var 1
label_alphabet['label_A']

'a'

In [52]:
# what is the first letter?
# var 2 -> if the name is ok for that
label_alphabet.label_A

'a'

In [53]:
# what is the 11th letter?
label_alphabet['label_K']

'k'

In [54]:
# what are the first 3 letters?
# inclusive!!!
label_alphabet[:'label_C']

label_A    a
label_B    b
label_C    c
dtype: object

In [55]:
# what are the letters from 6 to 10
label_alphabet['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [56]:
# what are the last 6 letters?
label_alphabet['label_U':]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

### BONUS: The add_prefix() And add_suffix() Methods

In [57]:
# pandas has a build in method to make mapping (example with lambda from lecture 10 -> map(lambda x: 'label_ ...'))
# add prefix (in the begining of the index(label))
# if we want to keep the changes, we must reassign the original Series or DataFrame....
# alphabet2 = alphabet.add_prefix('label_')
alphabet.add_prefix('label_')

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [58]:
# add suffix (in the end of the index(label))
alphabet.add_suffix('_label')

0_label     a
1_label     b
2_label     c
3_label     d
4_label     e
5_label     f
6_label     g
7_label     h
8_label     i
9_label     j
10_label    k
11_label    l
12_label    m
13_label    n
14_label    o
15_label    p
16_label    q
17_label    r
18_label    s
19_label    t
20_label    u
21_label    v
22_label    w
23_label    x
24_label    y
25_label    z
dtype: object

###  Lecture 11 - Using Dot Notation

In [59]:
# if the index name allows (its continous 'label_A', not split 'label A' ), we can call it like attribute
label_alphabet.label_A

'a'

### Lecture 12 - Boolean Masks And The .loc Indexer

In [60]:
# loc -> location, indexing by label
# for now is enougth to know that loc is faster then slicing. We use it more with DataFrames
label_alphabet.loc['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [61]:
# boolean mask length must be equal to the Series length or we will get index err
book_series.loc[[True, True, True]]

index names
0    book_1
1    book_2
2    book_3
Name: my books, dtype: object

In [62]:
book_series.loc[[True, False, True]]

index names
0    book_1
2    book_3
Name: my books, dtype: object

In [63]:
# the normal case is not write True or False for a big Series, but make it dynamic
alphabet.loc[[True for _ in range(len(alphabet))]]

0     a
1     b
2     c
3     d
4     e
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [64]:
alphabet.loc[[True if i % 2 == 0 else False for i in range(len(alphabet))]]

0     a
2     c
4     e
6     g
8     i
10    k
12    m
14    o
16    q
18    s
20    u
22    w
24    y
dtype: object

### Lecture 13 - Extracting By Position With .iloc

In [65]:
# iloc -> int location, indexing by position
label_alphabet.iloc[0]

'a'

In [66]:
# by slicing
label_alphabet.iloc[1:3]

label_B    b
label_C    c
dtype: object

In [67]:
# by collection (list)
label_alphabet.iloc[[1, 7, 12]]

label_B    b
label_H    h
label_M    m
dtype: object

### BONUS: Using Callables With .loc And .iloc

In [68]:
label_alphabet.loc[lambda x: 'label_A']

'a'

In [69]:
label_alphabet.loc[lambda x: ['label_A', 'label_B']]

label_A    a
label_B    b
dtype: object

In [70]:
label_alphabet.loc[lambda x: [True for _ in range(x.size)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [71]:
def every_fifth_letter(letters):
    return [True if (i+1) % 5 == 0 else False for i in range(letters.size)]

In [72]:
label_alphabet.iloc[every_fifth_letter(label_alphabet)]

label_E    e
label_J    j
label_O    o
label_T    t
label_Y    y
dtype: object

### Lecture 14 -Selecting With .get()

In [73]:
# get is very similar to loc and square brackets indexing
label_alphabet.get('label_A')

'a'

In [74]:
# if we pass non existing label, get will return None (loc and [] will get an err)
label_alphabet.get('label_not_exsting') == None

True

In [75]:
# the reason of that is couse get() has an attribute called default and its None by default
# we can pass some value (obj) if we want to return something else
label_alphabet.get('label_not_exsiting', default='Could not find that label')

'Could not find that label'

In [76]:
label_alphabet.get('label_not_exsiting', default=25)

25

In [77]:
label_alphabet.get('label_not_exsiting', default=[1, 2.5, True, 'Hello'])

[1, 2.5, True, 'Hello']

In [78]:
label_alphabet.get('label_not_exsiting', default={'a': 1})

{'a': 1}

In [79]:
# we can also pass position like we do it with iloc
label_alphabet.get(0)

'a'

### Lecture 15 - Selection Recap

In [80]:
# take a look at the pdf presentation to se all mathods for selecting by label or position

### Lecture 16 - Skill Challenge

In [81]:
# create a Series of length 100 containing the squares of integers from 0 to 99. Assign it to the variable squares
square_s = pd.Series([num**2 for num in range(100)])
square_s

0        0
1        1
2        4
3        9
4       16
      ... 
95    9025
96    9216
97    9409
98    9604
99    9801
Length: 100, dtype: int64

In [82]:
# Extract last three items from the Series using [] idx'ing
square_s[-3:]

97    9409
98    9604
99    9801
dtype: int64

In [83]:
# Repeat the step but this time use .tail() method
square_s.tail(3)

97    9409
98    9604
99    9801
dtype: int64

In [84]:
# verify that the output from the extractions are the same using equals() method
square_s.tail(3).equals(square_s[-3:])

True