In [1]:
import numpy as np
import pandas as pd
import os

# Functions

Functions in Python are "first class", meaning you can assign them to variables and pass them as inputs to other functions if you want.

In [2]:
def double(x):
    return 2*x

def apply_to_one(f):
    """Calls the function f with 1 as its argument."""
    return f(1)

my_double = double
x = apply_to_one(my_double)

print(x)

2


lambdas are "anonymous" functions that are self-contained

In [3]:
y = apply_to_one(lambda x: x +4)
print(y)

# Because lambda is a function it apply_to_one assigns x the value 1 

5


Default arguments to functions are specified in the opening line:

In [4]:
def my_print(message="my default message"):
    print(message)
    
my_print("Hello")
my_print()

Hello
my default message


You can also specify arguments by name:

In [5]:
def full_name(first="Joe",last="Bloggs"):
    return first + " " + last

full_name("Joel","Gros")
full_name("Joel")
full_name(last='Mackenzie')

'Joe Mackenzie'

# Strings

Python treats special characters like tab, newline as one charater delineated by a backslash.

To interpret backslashes as backslashes, preface your string quotations with an "r".

In [6]:
print(len("\t"))
print(len(r"\t"))

1
2


Use triple-quotes to create multiline strings:

In [7]:
multi_line = """This is line 1.
This is line 2.
This is line 3."""

print(multi_line)

This is line 1.
This is line 2.
This is line 3.


f-strings are useful when incorporating the value(s) of variable(s) into your string:

In [8]:
first_name = "Calum"
last_name = "Mackenzie"

full_name = first_name + " " + last_name

print(f"{first_name} {last_name}")
print("{0} {1}".format(first_name,last_name))

Calum Mackenzie
Calum Mackenzie


# Exceptions

Exceptions tell Python what to do when it runs into errors in your code, rather than just crash.

In [9]:
try:
    print(0/0)
except ZeroDivisionError:
    print("Can't divide by zero.")

Can't divide by zero.


# Lists

In [10]:
integer_list = [1,2,3]
hetero_list = [1,'a',3.14,True]
list_of_lists = [integer_list,
                 hetero_list,
                 []]

print(len(hetero_list))
print(len(list_of_lists))

4
3


## Membership 

In [11]:
1 in [1,2,3]

True

In [12]:
5 in "a,b,c".split(",")

False

## Slicing

In [13]:
x = list(range(0,10))
x

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [14]:
x[:3]

[0, 1, 2]

In [15]:
x[3:]

[3, 4, 5, 6, 7, 8, 9]

In [16]:
x[:]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [17]:
x[1:-1]

[1, 2, 3, 4, 5, 6, 7, 8]

In [18]:
x[::3]

[0, 3, 6, 9]

In [19]:
x[5:2:-1]

[5, 4, 3]

## Extension

In [20]:
y = [1,2,3]

y.extend([4,5,6])

y

[1, 2, 3, 4, 5, 6]

In [21]:
y = [1,2,3]

y.append([4,5,6])

y

[1, 2, 3, [4, 5, 6]]

In [22]:
x = [1,2,3]

y = x + [4,5,6]

y

[1, 2, 3, 4, 5, 6]

## Unpacking

In [23]:
x, y = [1,2]

You raise an error if the number of variables on the left doesn't equal the length of the list.

In [24]:
x, y, z = [4,5]

ValueError: not enough values to unpack (expected 3, got 2)

Convention: assign values you aren't using to variable _

In [25]:
_, y = [7,8]

y

8

# Tuples

Just like lists, but they're immutable

In [26]:
lil_tup = (1,2)

try:
    lil_tup[1] = 3
except TypeError:
    print("Can't modify a tuple!")

Can't modify a tuple!


they're convenient for returning values from functions:

In [27]:
def sum_and_prod(x,y):
    return (x + y), (x * y)

sp = sum_and_prod(2,3)
s,p = sum_and_prod(5,10)

In [28]:
print(sp)
print(s)
print(p)

(5, 6)
15
50


They (and lists) can be used for "multiple assignment":

In [29]:
x,y = 1,2
print(x)
x,y = y,x # Super pythonic
print(x)

1
2


# Dictionaries

Fundamental data structure. Dicts map keys -> values

In [30]:
empty_dict = {} # Pythonic
empty_dict2 = dict() # Not so pythonic
grades = {"Joel":80,"Tim":95} # Literal

In [31]:
joels_grade = grades["Joel"]
print(joels_grade)

80


You raise a KeyError if you ask for a key that isnt in the dict:

In [32]:
try:
    grades["Jess"]
except KeyError:
    print("No grade for them!")

No grade for them!


## Membership

In [33]:
"Steven" in grades

False

In [34]:
"Tim" in grades

True

Dicts have a .get method that returns a default value rather than raise an exception when you look up a key that is not in the dict:

In [35]:
joels_grade = grades.get("Joel",0) # 80
bens_grade = grades.get("Ben",0) # 0 default
nobodys_grade = grades.get("Nobody") # None default

## Assignment

Add new k-v pairs with square brackets:

In [36]:
grades["Tim"] = 90 # Updated value
grades["Kate"] = 100 # New value
num_students = len(grades)
print(num_students)

3


Dicts are used to represent structured data all the time, and are very close to JSON (basically equal):

In [37]:
tweet = {
    "user":"Calum Mackenzie",
    "text":"9/11 was an inside job",
    "retweet_count":100,
    "hashtags":"#datascience,#accuracy,#crypto".split(",")
}

tweet

{'user': 'Calum Mackenzie',
 'text': '9/11 was an inside job',
 'retweet_count': 100,
 'hashtags': ['#datascience', '#accuracy', '#crypto']}

Other super useful dict methods for accessing all keys/all values/all pairs:

In [38]:
# Keys
tweet.keys()

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [39]:
# Values
tweet.values()

dict_values(['Calum Mackenzie', '9/11 was an inside job', 100, ['#datascience', '#accuracy', '#crypto']])

In [40]:
# Tuples of k,v
tweet.items()

dict_items([('user', 'Calum Mackenzie'), ('text', '9/11 was an inside job'), ('retweet_count', 100), ('hashtags', ['#datascience', '#accuracy', '#crypto'])])

## defaultdict

Imagine you're trying to count the words in a document.

An obvious way to do this would be to iterate over your document and define a dict where the words are the keys and the values are their count.

As you iterate, if a word is already in the keys you increment by one; if not you add a new key and set its value to 1.

### basic bitch way

In [41]:
document = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.".split(" ")

word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1
        
word_counts

{'Lorem': 1,
 'ipsum': 1,
 'dolor': 2,
 'sit': 1,
 'amet,': 1,
 'consectetur': 1,
 'adipiscing': 1,
 'elit,': 1,
 'sed': 1,
 'do': 1,
 'eiusmod': 1,
 'tempor': 1,
 'incididunt': 1,
 'ut': 2,
 'labore': 1,
 'et': 1,
 'dolore': 2,
 'magna': 1,
 'aliqua.': 1,
 'Ut': 1,
 'enim': 1,
 'ad': 1,
 'minim': 1,
 'veniam,': 1,
 'quis': 1,
 'nostrud': 1,
 'exercitation': 1,
 'ullamco': 1,
 'laboris': 1,
 'nisi': 1,
 'aliquip': 1,
 'ex': 1,
 'ea': 1,
 'commodo': 1,
 'consequat.': 1,
 'Duis': 1,
 'aute': 1,
 'irure': 1,
 'in': 3,
 'reprehenderit': 1,
 'voluptate': 1,
 'velit': 1,
 'esse': 1,
 'cillum': 1,
 'eu': 1,
 'fugiat': 1,
 'nulla': 1,
 'pariatur.': 1,
 'Excepteur': 1,
 'sint': 1,
 'occaecat': 1,
 'cupidatat': 1,
 'non': 1,
 'proident,': 1,
 'sunt': 1,
 'culpa': 1,
 'qui': 1,
 'officia': 1,
 'deserunt': 1,
 'mollit': 1,
 'anim': 1,
 'id': 1,
 'est': 1,
 'laborum.': 1}

### slightly more robust way (use an exception)

In [42]:
word_counts = {}

for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1
        
word_counts

{'Lorem': 1,
 'ipsum': 1,
 'dolor': 2,
 'sit': 1,
 'amet,': 1,
 'consectetur': 1,
 'adipiscing': 1,
 'elit,': 1,
 'sed': 1,
 'do': 1,
 'eiusmod': 1,
 'tempor': 1,
 'incididunt': 1,
 'ut': 2,
 'labore': 1,
 'et': 1,
 'dolore': 2,
 'magna': 1,
 'aliqua.': 1,
 'Ut': 1,
 'enim': 1,
 'ad': 1,
 'minim': 1,
 'veniam,': 1,
 'quis': 1,
 'nostrud': 1,
 'exercitation': 1,
 'ullamco': 1,
 'laboris': 1,
 'nisi': 1,
 'aliquip': 1,
 'ex': 1,
 'ea': 1,
 'commodo': 1,
 'consequat.': 1,
 'Duis': 1,
 'aute': 1,
 'irure': 1,
 'in': 3,
 'reprehenderit': 1,
 'voluptate': 1,
 'velit': 1,
 'esse': 1,
 'cillum': 1,
 'eu': 1,
 'fugiat': 1,
 'nulla': 1,
 'pariatur.': 1,
 'Excepteur': 1,
 'sint': 1,
 'occaecat': 1,
 'cupidatat': 1,
 'non': 1,
 'proident,': 1,
 'sunt': 1,
 'culpa': 1,
 'qui': 1,
 'officia': 1,
 'deserunt': 1,
 'mollit': 1,
 'anim': 1,
 'id': 1,
 'est': 1,
 'laborum.': 1}

### slightly more elegant way (use get)

In [43]:
word_counts = {}

for word in document:
    previous_count = word_counts.get(word,0)
    word_counts[word] = previous_count + 1
    
word_counts

{'Lorem': 1,
 'ipsum': 1,
 'dolor': 2,
 'sit': 1,
 'amet,': 1,
 'consectetur': 1,
 'adipiscing': 1,
 'elit,': 1,
 'sed': 1,
 'do': 1,
 'eiusmod': 1,
 'tempor': 1,
 'incididunt': 1,
 'ut': 2,
 'labore': 1,
 'et': 1,
 'dolore': 2,
 'magna': 1,
 'aliqua.': 1,
 'Ut': 1,
 'enim': 1,
 'ad': 1,
 'minim': 1,
 'veniam,': 1,
 'quis': 1,
 'nostrud': 1,
 'exercitation': 1,
 'ullamco': 1,
 'laboris': 1,
 'nisi': 1,
 'aliquip': 1,
 'ex': 1,
 'ea': 1,
 'commodo': 1,
 'consequat.': 1,
 'Duis': 1,
 'aute': 1,
 'irure': 1,
 'in': 3,
 'reprehenderit': 1,
 'voluptate': 1,
 'velit': 1,
 'esse': 1,
 'cillum': 1,
 'eu': 1,
 'fugiat': 1,
 'nulla': 1,
 'pariatur.': 1,
 'Excepteur': 1,
 'sint': 1,
 'occaecat': 1,
 'cupidatat': 1,
 'non': 1,
 'proident,': 1,
 'sunt': 1,
 'culpa': 1,
 'qui': 1,
 'officia': 1,
 'deserunt': 1,
 'mollit': 1,
 'anim': 1,
 'id': 1,
 'est': 1,
 'laborum.': 1}

All of the above are a bit fiddly, which is why defaultdict is useful.

defaultdict is exactly like a basic dictionary, except that when you look up a key not in the dict it creates this key with a default value according to a zero-argument function of your choosing.

You import defaultdict from collections

In [44]:
from collections import defaultdict

word_counts = defaultdict(int) # int() produces 0

for word in document:
    word_counts[word] += 1
    
word_counts

defaultdict(int,
            {'Lorem': 1,
             'ipsum': 1,
             'dolor': 2,
             'sit': 1,
             'amet,': 1,
             'consectetur': 1,
             'adipiscing': 1,
             'elit,': 1,
             'sed': 1,
             'do': 1,
             'eiusmod': 1,
             'tempor': 1,
             'incididunt': 1,
             'ut': 2,
             'labore': 1,
             'et': 1,
             'dolore': 2,
             'magna': 1,
             'aliqua.': 1,
             'Ut': 1,
             'enim': 1,
             'ad': 1,
             'minim': 1,
             'veniam,': 1,
             'quis': 1,
             'nostrud': 1,
             'exercitation': 1,
             'ullamco': 1,
             'laboris': 1,
             'nisi': 1,
             'aliquip': 1,
             'ex': 1,
             'ea': 1,
             'commodo': 1,
             'consequat.': 1,
             'Duis': 1,
             'aute': 1,
             'irure': 1,
             'in

The default value can be a variety of data structures:

In [45]:
dd_list = defaultdict(list) # an empty list
dd_list[2].append(1)
dd_list

defaultdict(list, {2: [1]})

In [46]:
dd_dict = defaultdict(dict) # an empty dict
dd_dict["Joel"]["City"] = "Glasgow"
dd_dict

defaultdict(dict, {'Joel': {'City': 'Glasgow'}})

In [47]:
dd_pair = defaultdict(lambda: [0,0]) # [0,0]
dd_pair[2][1] = 1
dd_pair

defaultdict(<function __main__.<lambda>()>, {2: [0, 1]})

# Counters

A counter turns a sequence of values into a defaultdict(int)-like object mapping keys to counts:

In [48]:
from collections import Counter
c = Counter([0,1,2,0])
c

Counter({0: 2, 1: 1, 2: 1})

In [49]:
word_counts = Counter(document)
word_counts

Counter({'Lorem': 1,
         'ipsum': 1,
         'dolor': 2,
         'sit': 1,
         'amet,': 1,
         'consectetur': 1,
         'adipiscing': 1,
         'elit,': 1,
         'sed': 1,
         'do': 1,
         'eiusmod': 1,
         'tempor': 1,
         'incididunt': 1,
         'ut': 2,
         'labore': 1,
         'et': 1,
         'dolore': 2,
         'magna': 1,
         'aliqua.': 1,
         'Ut': 1,
         'enim': 1,
         'ad': 1,
         'minim': 1,
         'veniam,': 1,
         'quis': 1,
         'nostrud': 1,
         'exercitation': 1,
         'ullamco': 1,
         'laboris': 1,
         'nisi': 1,
         'aliquip': 1,
         'ex': 1,
         'ea': 1,
         'commodo': 1,
         'consequat.': 1,
         'Duis': 1,
         'aute': 1,
         'irure': 1,
         'in': 3,
         'reprehenderit': 1,
         'voluptate': 1,
         'velit': 1,
         'esse': 1,
         'cillum': 1,
         'eu': 1,
         'fugiat': 1,
         '

# Sets

Sets are a data structure that contain distinct elements i.e. no duplicates

In [50]:
primes_below_10 = {2,3,5,7}
primes_below_10

{2, 3, 5, 7}

To define an empty set, you need to use set()

In [51]:
s = set()
s.add(1)
s.add(2)
print(s)
s.add(2)
print(s)

{1, 2}
{1, 2}


In [52]:
y = 2 in s
y

True

In [53]:
z = 5 in s
z

False

Two main benefits of sets:
    - membership operations are *super* fast in sets
    - great for finding the distinct elements in a collection

In [54]:
stopwords_list = ["a", "an", "at"] + ["..."] + ["yet", "you"]
"zip" in stopwords_list # False, but have to check every element in list

stopwords_set = set(stopwords_list)
"zip" in stopwords_set # False, but suuuuuuper quick to check

False

In [55]:
item_list = [1,2,3,1,2,3]
num_items = len(item_list)
item_set = set(item_list)
num_distinct_items = len(item_set)
distinct_item_list = list(item_set)

# Sorting

Every Python list has a sort method that sorts it in place.

If you want to return a new list, use the sorted method instead.

In [56]:
x = [4,1,2,3]
print(sorted(x))
print(x)
x.sort(reverse=False)
print(x)

[1, 2, 3, 4]
[4, 1, 2, 3]
[1, 2, 3, 4]


# List comprehensions

The pythonic way to create lists from other lists.

In [57]:
even_numbers = [x for x in range(5) if x % 2 == 0]
even_numbers

[0, 2, 4]

In [58]:
squares = [x*x for x in range(5)]
squares

[0, 1, 4, 9, 16]

In [59]:
even_squares = [x*x for x in even_numbers]
even_squares

[0, 4, 16]

You can also turn lists into dictionaries or sets:

In [60]:
square_dict = {x:x*x for x in range(5)}
square_dict

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [61]:
square_set = {x*x for x in [1,-1]}
square_set

{1}

If you don't need the value from a list, its convention to use _ in the list comp

In [62]:
zeros = [0 for _ in even_numbers]
zeros

[0, 0, 0]

List comps can contain multiple fors:

In [63]:
pairs = [(x,y) 
         for x in range(10) 
         for y in range(10)]

# 100 pairs from (0,0) (0,1) ... (9,8) (9,9)
pairs

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (1, 8),
 (1, 9),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (2, 8),
 (2, 9),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (4, 0),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (5, 0),
 (5, 1),
 (5, 2),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (6, 0),
 (6, 1),
 (6, 2),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (7, 0),
 (7, 1),
 (7, 2),
 (7, 3),
 (7, 4),
 (7, 5),
 (7, 6),
 (7, 7),
 (7, 8),
 (7, 9),
 (8, 0),
 (8, 1),
 (8, 2),
 (8, 3),
 (8, 4),
 (8, 5),
 (8, 6),
 (8, 7),
 (8, 8),
 (8, 9),
 (9, 0),
 (9, 1),
 (9, 2),
 (9, 3),
 (9, 4),
 (9, 5),
 (9, 6),
 (9, 7),
 (9, 8),
 (9, 9)]

Later fors can use the results from earlier ones:

In [64]:
increasing_pairs = [(x,y) 
                    for x in range(10) 
                    for y in range(x+1,10)]

increasing_pairs

[(0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (1, 8),
 (1, 9),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (2, 8),
 (2, 9),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (6, 7),
 (6, 8),
 (6, 9),
 (7, 8),
 (7, 9),
 (8, 9)]

# Automated testing and assertions

These are a great way to be confident that you're code is correct and working as intended and you should use them all the time.

We'll stick to just using assert statements for now.

In [65]:
assert 1 + 1 == 2
assert 1 + 1 == 2, "1 + 1 should equal 2 but it didn't"

If the code in the assert statements yields True, nothing happens.

If it's False, it raises the assertion error that you define.

In [66]:
def smallest_items(xs):
    return min(xs)

assert smallest_items([10,20,5,40]) == 5
assert smallest_items([1,0,-1,2]) == -1

You can also use assertions to check inputs to functions but this is less common:

In [67]:
def smallest_items2(xs):
    assert xs, "Empty list has no smallest item"
    return min(xs)

smallest_items2(xs=[])

AssertionError: Empty list has no smallest item

# Classes

Classes encapsulate data and the functions that operate on them.

Kind of like recipes.

By way of example, let's define a CountingClicker class that maintains a count, allows that count to clicked to increment the count, allows you to read the current count, and reset the count to zero.

To define a class you use the class keyword and PascalCase name:

In [69]:
class CountingClicker:
    """A class should have a docstring just like a function."""
    def __init__(self,count = 0): # A constructor, which contains the info you need and does the necessar setup to build an instance of your class
        self.count = count
    def __repr__(self):
        return f"CountingClicker(count={self.count})"
    def click(self,num_times=1):
        """Click the clicker a given number of times."""
        self.count += num_times
    def read(self):
        return self.count
    def reset(self):
        self.count = 0

In [70]:
clicker1 = CountingClicker() # default value zero
clicker2 = CountingClicker(100) # start count at 100
clicker3 = CountingClicker(count=100) # explicitly start count at 100

In [71]:
clicker3.__repr__()

'CountingClicker(count=100)'

Having defined it, lets write some assert test-cases for our clicker:

In [72]:
clicker = CountingClicker()
assert clicker.read() == 0, "clicker should start with count = 0"
clicker.click()
clicker.click()
assert clicker.read() == 2, "after two clicks the count should equal 2"
clicker.reset()
assert clicker.read() == 0, "after reset, count should return to 0"

# Sub-classess

Suppose we wanted to define a non-resettable clicker. We could do so by defining a sub-class of CountingClicker that doesn't have a reset method.

In [73]:
# A subclass inherits everything from its parent class
class NoResetClicker(CountingClicker):
    # This has all the same methods and attributes as CountingClicker
    # Except that the reset method does nothing
    def reset(self):
        pass

In [74]:
clicker2 = NoResetClicker()
assert clicker2.read() == 0
clicker2.click()
assert clicker2.read() == 1
clicker2.reset()
assert clicker2.read() == 1, "Reset shouldn't do anything"

# Iterables and generators

In the interests of efficiency, when iterating through lists etc. its better to only produce the objects/values that you actually need. Imagine you had a list a billion numbers long and you only needed the 233,999,356th digit.

Hence we use things called generators which can be iterated over just like lists but that produce values lazily (one at a time as needed).

One way to create generators is to define a function and use yield (instead of return):

In [75]:
def generate_a_range(n):
    i = 0
    while i < n:
        yield i
        i += 1

# This loop consumes the yielded values one at a time until none are left.        
for i in generate_a_range(10):
    print(f"i: {i}")

i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9


You can also create infinite sequences:

In [76]:
def natural_numbers():
    """Returns 1, 2, 3, ..."""
    n = 1
    while True:
        yield n
        n += 1

The increased efficiency of lazily producing values is balanced against the fact that you can only iterate through a generator once. It doesn't remember anything.

Therefore, for multiple iterations you have to either define another generator or use a different approach.

Another way to create generators is to use `for` comprehensions within parentheses:

In [77]:
evens_sub_20 = (i for i in generate_a_range(20) if i % 2 == 0)

print(evens_sub_20)

for even in evens_sub_20:
    print(even)

<generator object <genexpr> at 0x0000018EF6A55E08>
0
2
4
6
8
10
12
14
16
18


In [78]:
names = "Alice,Bob,Calum,Diana".split(",")

for i, name in enumerate(names):
    print(f"name {i} is {name}")

name 0 is Alice
name 1 is Bob
name 2 is Calum
name 3 is Diana


# Randomness

Bread and butter data stuff requires playing with random numbers. You can do this with the `random` module.

In [79]:
import random
random.seed(10)

four_uniform_randoms = [random.random() for _ in range(4)]
print(four_uniform_randoms)

[0.5714025946899135, 0.4288890546751146, 0.5780913011344704, 0.20609823213950174]


In [80]:
random.randrange(10) # choose randomly from range(10)

7

In [81]:
random.randrange(3,6) # choose randomly from range(3,6)

4

`random.shuffle` randomly reorders a list like shuffling a deck of cards:

In [82]:
to_ten = [x for x in range(1,11)]
print(to_ten)
random.shuffle(to_ten)
print(to_ten)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[4, 5, 6, 7, 2, 9, 10, 8, 1, 3]


`random.choice` randomly picks one element from a list:

In [83]:
my_best_friends = "Alice,Bob,Calum".split(",")
my_bestie = random.choice(my_best_friends)
print(my_bestie)

Alice


To randomly pick a sample of elements without replacement use `random.sample`:

In [84]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers,6)
print(winning_numbers)

[38, 22, 24, 26, 18, 52]


To randomly pick a sample with replacement, use `random.choice` multiple times:

In [85]:
four_with_rep = [random.choice(range(10)) for _ in range(4)]
print(four_with_rep)

[4, 7, 2, 4]


# Regular Expressions

Regex is a huge topic unto itself, but this covers the absolute basics.

Regex = regular expression i.e. a string that matches a defined pattern like dates, bank statements, addresses, etc.

Some examples:

In [86]:
import re

In [87]:
re_examples = [
    not re.match("a","cat"), # cat doesn't start with a
    re.search("a","cat"), # cat has an a in it
    not re.search("c","dog"), # dog doesn't have a c in it
    3 == len(re.split("[ab]","carbs")), # Split on a or b to ['c','r','s']
    "R-D-" == re.sub("[0-9]","-","R2D2") # Replace digits with dashes
]

print(all(re_examples))

True


`re.match` checks whether the beginning of a string matches a regex (think of LEFT()=='foo' in excel)

`re.search` checks whether any part of a string matches the regex (think isnumber(search("foo",cell)) in excel)

# zip and Argument Unpacking

`zip` allows you to "zip" two or more iterables together (hence the name).

The `zip` function transforms multiple iterables intoa  single iterable of tuples:

In [88]:
list1=[1,2,3]
list2=['a','b','c']

# Zip is lazy, so use list comprehensions if you have to retain the data
[pair for pair in zip(list1,list2)]

[(1, 'a'), (2, 'b'), (3, 'c')]

If the list lengths are different, `zip` stops as soon as it gets to the end of the short list.

You can unzip using this:

In [89]:
pairs = [pair for pair in zip(list1,list2)]

letters, numbers = zip(*pairs)

print(letters)
print(numbers)

(1, 2, 3)
('a', 'b', 'c')


The asterisk performs something called 'argument unpacking' which uses the elemnts of pairs as individual arguments to zip. It's equivalent to:

In [90]:
letters, numbers = zip(('a', 1), ('b', 2), ('c', 3))
print(letters)
print(numbers)

('a', 'b', 'c')
(1, 2, 3)


You can use argument unpacking with any function:

In [91]:
def add(a,b): return a + b

print(add(1,2))

# try:
#     add(1,2)
# except TypeError:
#     print("Add expects two inputs")

print(add(*[1,2]))

3
3


# args and kwargs

Let's get abstract. Suppose you wanted to define a function of a function (of a function of a function...) that:
   - takes as input one function; and
   - returns as output another.

In [92]:
def doubler(f):
    def g(x):
        return 2 * f(x)
    return g

def f1(x):
    return x + 1
g = doubler(f1)

assert g(3) == 8, "(3+1) * 2 should equal 8"
assert g(-1) == 0, "(-1 + 1) * 2 should equal 0"

In [93]:
print(g(3))

8


In [94]:
print(g(-1))

0


This doesn't work with functions that have more than one argument:

In [95]:
def f2(x,y):
    return x + y

g = doubler(f2)
try:
    g(1,2)
except TypeError:
    print("as defined, g only takes one argument")

as defined, g only takes one argument


What we need then is a way to specify a function that takes arbitrary arguments.

We can do this with argument unpacking and some python magic

In [96]:
def magic(*args,**kwargs):
    print("unnamed args:", args)
    print("keyword args:", kwargs)
    
magic(1,2,key='word',key2='word2')

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


When you define a function with `*args` and `**kwargs`, python interprets the first as an n-tuple of the unnamed arguments and a dict of the named arguments.

It also works in reverse - you can supply arguments to a function from lists and dicts:

In [97]:
def other_way_magic(x,y,z):
    return x + y + z

x_y_list = [1,2]
z_dict = {"z":3}

assert other_way_magic(*x_y_list,**z_dict) == 6, "1 + 2 + 3 should equal 6"

You can do all kinds of weird stuff with this trick. 

We'll limit it to produce higher-order functions whose inputs can accept arbitrary arguments.

In [98]:
def doubler_correct(f):
    """Works no matter what kind of inputs f expects"""
    def g(*args,**kwargs):
        """whatever arguments g gets are passed to f"""
        return 2 * f(*args, **kwargs)
    return g

g = doubler_correct(f2)
assert g(1,2) == 6, "doubler should work now"

In [99]:
g(1,2)

6

While this is a cool trick, its always better to be explicit about what arguments your functions take, so avoid using it.

# Type annotations

Python is *dynamically typed*, meaning it doesn't care what data types you use as long as you use them validly:

In [102]:
def add(a,b):
    return a + b

print(add(1,2),
      add([4,5],[6]),
      add("Hello","world"))

3 [4, 5, 6] Helloworld


A *statically typed* language would by contrast only accept the prescribed types:

In [104]:
def add2(a: int, b: int) -> int:
    return a + b

print(add2(10,5))
print(add2("hi","mum")) # would raise an error

15
himum


## How to write type annotations

Say you had a list as one of the inputs to your function:

In [105]:
def total(xs: list) -> float:
    return sum(xs)

This isn't wrong but its not explicit or specific enough.

Say what we want is for `xs` to be a list of floats and not a list of strings.

The `typing` module allows us to do this by providing a number of parameterised types that we can use to do just this:

In [108]:
from typing import List #Upper case L

def total(xs:List[float] = [1,1,1]) -> float:
    return sum(xs)

In [110]:
test = [1.,2.,3.,4.]

total()

3

In cases where it isn't obvious what the type of an argument or variable is, you can supply inline hints:

In [112]:
from typing import Optional

values: List[int] = []
best_so_far: Optional[float] = None # can be float or None