### Exception handling

In [1]:
# Python will throw a ZeroDivisionError exception
2 / 0

ZeroDivisionError: division by zero

In [2]:
# Python will throw a TypeError exception
2 + "five"

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [3]:
# Exception handling. This allows us to continue executing
# our program even when errors occur.
try:
    #x = 2 + "five"       # Throws TypeError exception
    #print(x)
    2 / 0                # Throws ZeroDivisionError exception
    print("I managed to divide a number by zero")
except ZeroDivisionError:      # Catches ZeroDivisionError exception
    print("I made a mistake but I don't care!")
except TypeError:              # Catches TypeError exception
    print("WARNING: A TypeError occurred")
print("I am now continuing with my program")

I made a mistake but I don't care!
I am now continuing with my program


In [4]:
x = [1,2,4]
x[5]

IndexError: list index out of range

In [5]:
raise TypeError("My error message")

TypeError: My error message

### None, truthy and falsy values

In [6]:
x = None

In [7]:
print(x)

None


In [8]:
if x is not None:
    print(x)
else:
    print("not x")

not x


In [9]:
x = False
#x = ""        # Falsy
#x = None      # Falsy
#x = 0         # Falsy
#x = []        # Falsy
#x = 2         # Truthy
#x = "hello"   # Truthy
#x = [1, 2]     # Truthy

#if x is not None:
#    print(x)

#if x:     # is there "something" in x?
#    print("yes")
#else:
#    print("no")
    
#if x != "":
#    print("yes")
#else:
#    print("no")

In [10]:
def f():
    print("hello there")
    return
f()

hello there


In [11]:
print(f())

hello there
None


### Iterators and generators

Iterators are important because they are the constructs that allow us to write loops like
`for x in my_list...`. 

In [12]:
# my_list is an "iterable"; it can be iterated over
my_list = [2, 3, 5, 1, 7]

In [13]:
# my_list_iterator is an "iterator": it's a pointer that moves over each element
# of my_list, starting from the first element until a StopIteration exception is
# thrown
my_list_iterator = iter(my_list)

In [14]:
# __next__() is the key method for an iterator
next(my_list_iterator)     # equiv. to my_list_iterator.__next__()

2

In [15]:
next(my_list_iterator) 

3

In [16]:
next(my_list_iterator) 

5

In [17]:
next(my_list_iterator) 

1

In [18]:
next(my_list_iterator) 

7

In [19]:
next(my_list_iterator) 

StopIteration: 

In [20]:
# A "generator function" (or generator) is a special type of function that returns a SERIES
# of values via "yield" instead of "return".
def doubler(numbers):
    for num in numbers:
        # When we use a 'yield' instead of a return, two things happen: we return num * 2
        # but *also* the function will remember where it was in the for loop.
        yield num * 2


In [21]:
# What the generator returns is a "generator iterator" that
# can be used in (for example) a standard "for...in" loop
my_numbers = [2, 3, 5, 7, 1]
for num in doubler(my_numbers):
    print(num)

4
6
10
14
2


In [22]:
# If we simply output the result of doubler() directly, we can see that we've made a generator
doubler([1,2,3])

<generator object doubler at 0x10cadcdb0>

In [23]:
# Remember that a generator is an iterator, so we can use next() as before...
my_generator_iterator = doubler([2, 3, 6])
next(my_generator_iterator)

4

In [24]:
next(my_generator_iterator)

6

In [25]:
next(my_generator_iterator)

12

In [26]:
next(my_generator_iterator)

StopIteration: 

In [27]:
# We can easily achieve the same results without using a generator.
# But notice that this time, we need to create a 'doubled_list' variable.
# This list will be the same size as the original list, so we've wasted memory.
# A generator on the other hand will only need to store the value it is currently
# operating on.
def doubler_without_generator(numbers):
    doubled_list = []
    for num in numbers:
        doubled_list.append(2 * num)
        
    return doubled_list

my_numbers = [2, 3, 5, 7, 1]
for num in doubler_without_generator(my_numbers):
    print(num)

4
6
10
14
2


In [28]:
# Generator expression: each number is only doubled when needed. This is unlike a list comprehension, where every
# element of the list will be stored in memory.
doubled_numbers = (num * 2 for num in my_numbers)
doubled_numbers
#doubled_numbers[0]     # won't work

<generator object <genexpr> at 0x10cadc9e8>

In [29]:
for num in doubled_numbers:
    print(num)

4
6
10
14
2


In [30]:
# Note that once we've worked our way to the end of the doubled_numbers generator, trying to use it again
# outputs nothing. This is because we've "consumed" the output of the generator.
# Remember that iterators only go forwards, not backwards!
for num in doubled_numbers:
    print(num)

In [31]:
# Slightly more complicated generator to calculate prime factors
import math
def is_prime(n):
    if n == 1:
        return False
    for i in range(2, int(math.sqrt(n)) + 1):
        if n % i == 0:
            return False
        
    return True


def prime_factors(n):
    for i in range(1, n):
        if n % i == 0 and is_prime(i):
            yield i

In [32]:
for factor in prime_factors(102010):
    print(factor)

2
5
101


In [33]:
# zip() takes the elements of parallel lists and pairs them together. In Python 3, zip() returns a iterator. 
list1 = ["name", "age", "postcode"]
list2 = ["Iskander", 52, "BE1 2RY"]
zipped_list = zip(list1, list2)

In [34]:
# This is an iterator. On its own, it's not very useful.
zipped_list

<zip at 0x10ca8c088>

In [35]:
# If we really need to get all the elements out in one go...
#list(zipped_list)

In [36]:
# This will construct item1, item2 "lazily" (i.e. as they're needed); nothing beyond
# these two elements is stored in memory
for item1, item2 in zipped_list:
    print(item1, item2)

name Iskander
age 52
postcode BE1 2RY


In [37]:
# Empty, we've already "consumed" all the elements in the zip iterator
print(list(zipped_list))

[]


In [38]:
print(list(zip(list1, list2)))

[('name', 'Iskander'), ('age', 52), ('postcode', 'BE1 2RY')]


In [39]:
# Zipping lists of different sizes creates a smaller-sized list
list(zip([1,2,3], [3,4,5,6]))

[(1, 3), (2, 4), (3, 5)]

### Packing and unpacking arguments

In [40]:
# Star operator unpacks list into positional arguments
def my_fun(x, y, z):
    return x + y + z
my_list = [1, 2, 3]
#print(my_fun(my_list))    # doesn't work
#print(my_fun(my_list[0], my_list[1], my_list[2]))    # this works, but it's ugly
print(my_fun(*my_list))

6


In [41]:
# Double star operator unpacks dict into keyword arguments
my_dict = {'x': 1, 'y': 2, 'z': 4}
print(my_fun(**my_dict))   # equiv. to print(my_fun(x=1, y=2, z=4))

7


In [42]:
# Getting arguments without explicitly naming them
# Here, 'args' refers to positional arguments
# 'kwargs' refers to keyword arguments
def my_function(*args, **kwargs):
    print(args)     # tuple
    print(kwargs)   # dictionary

# Note that keyword arguments should follow positional arguments
my_function(1, 2, 3, hello=1, world=3)

(1, 2, 3)
{'hello': 1, 'world': 3}


### Introspection

In [43]:
# Get the attributes of a class/object using dir()
from sklearn.linear_model import LinearRegression
model = LinearRegression()
dir(model)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_decision_function',
 '_estimator_type',
 '_get_param_names',
 '_preprocess_data',
 '_set_intercept',
 'copy_X',
 'fit',
 'fit_intercept',
 'get_params',
 'n_jobs',
 'normalize',
 'predict',
 'score',
 'set_params']

In [44]:
model = LinearRegression()
type(model)

sklearn.linear_model.base.LinearRegression

In [45]:
x = 22
type(x)

int

In [46]:
# Exact type checking
# Performing different actions based on the type of an object is not Pythonic
# and should generally be avoided. Instead, use try-and-except (in Python, it's
# "better to ask forgiveness than permission").
if type(model) is LinearRegression:
    print("i'm of type LinearRegression")
else:
    print("i'm not of type LinearRegression")

i'm of type LinearRegression


In [47]:
# Unlike type(), isinstance() also takes inheritance into account
# BaseEstimator is a parent of LinearRegression
from sklearn.base import BaseEstimator

if isinstance(model, BaseEstimator):
    print("i'm an instance of BaseEstimator")
else:
    print("i'm not an instance of BaseEstimator")

if type(model) is BaseEstimator:
    print("i'm of type BaseEstimator")
else:
    print("i'm not of type BaseEstimator")

i'm an instance of BaseEstimator
i'm not of type BaseEstimator


# Anonymous (lambda) functions

In [48]:
# Sorting is easy...
x = [32, 21, 55, 34]
sorted(x)

[21, 32, 34, 55]

In [49]:
# List of (name, age) tuples
x = [("John", 32), ("Kate", 21), ("Barry", 55), ("Jo", 34)]

# How do we sort by age? Define a custom "key" to be used
# for sorting.
def get_age(pair):
    """Get the age from a (name, age) tuple."""
    return pair[1]

sorted(x, key=get_age)

[('Kate', 21), ('John', 32), ('Jo', 34), ('Barry', 55)]

In [50]:
# It's annoying to have to write a completely separate function
# that does something extremely simple and is only going to be used
# inside the sort() function. We can instead define an "anonymous function"
# using the lambda syntax, allowing us to define the function inside the sorted()
# itself.
# In this case, 'pair' is the single argument for the anonymous function and
# 'pair[1]' is the return value.
sorted(x, key=lambda pair:pair[1])

[('Kate', 21), ('John', 32), ('Jo', 34), ('Barry', 55)]

In [51]:
# Yet another way of doing the same thing...
from operator import itemgetter
sorted(x, key=itemgetter(1))

[('Kate', 21), ('John', 32), ('Jo', 34), ('Barry', 55)]