In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [3]:
list_of_lists = [[1,2,3],[4,5,6],[7,8,9]]
easier_to_read_list_of_lists = [ [1,2,3],
                                 [4,5,6],
                                 [7,8,9] ]

In [4]:
two_plus_three = 2 +\
                 3

### Modules

In [6]:
import re as regex

my_regex = regex.compile("[0-9]+", regex.I)

re.compile(r'[0-9]+', re.IGNORECASE|re.UNICODE)

### Arithmetic

In [8]:
5/2

2.5

In [9]:
5//2

2

### Functions

In [10]:
def double_num(x):
    """
    DOCSTRING:
    Returns the number informed times two.
    """
    return x*2

In [11]:
double_num(2)

4

In [12]:
def apply_to_one(f):
    """
    Calls a function f with 1 as an argument.
    """
    return f(1)

In [13]:
apply_to_one(double_num)

2

In [14]:
apply_to_one(lambda x: x+4)

5

### String

In [15]:
single_quoted = 'science'
double_quoted = 'science'

In [16]:
tab_string = "\t"
len(tab_string)

1

In [17]:
not_tab_string = r"\t"
len(not_tab_string)

2

In [18]:
multi_line_string = """first line
second line
third line"""

In [20]:
print(multi_line_string)

first line
second line
third line


### Exceptions

In [21]:
try:
    print(0/0)
except ZeroDivisionError:
    print('Cannot divide by zero.')

Cannot divide by zero.


### Lists

In [22]:
integer_list = [1,2,3,4,5]
heterogeneous_list = ["str", 0.1, True]
list_of_lists = [integer_list, heterogeneous_list, []]

In [23]:
list_length = len(integer_list)
list_sum = sum(integer_list)

In [27]:
x = range(10)
zero = x[0]
one = x[1]
nine = x[-1]
eight = x[-2]
#x[0] = -1

In [28]:
first_three = x[:3]
three_to_end = x[3:]
one_to_four = x[1:5]
last_three = x[-3:]
without_first_and_last = x[1:-1]
copy_of_x = x[:]

In [29]:
1 in [1,2,3]

True

In [30]:
0 in [1,2,3]

False

In [31]:
x = [1,2,3]
x.extend([4,5,6]) #in-place
x

[1, 2, 3, 4, 5, 6]

In [32]:
x = [1,2,3]
y = x + [4,5,6]
y

[1, 2, 3, 4, 5, 6]

In [33]:
x = [1,2,3]
x.append(0)
x

[1, 2, 3, 0]

In [34]:
x,y = [1,2]
print(x,y)

1 2


In [35]:
_,y = [1,2]
y

2

### Tuples

In [36]:
my_list = [1,2,3]
my_tuple = (1,2,3)
other_tuple = 3, 4
my_list[1] = 3

In [37]:
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple!")

cannot modify a tuple!


Tuples are a convenient way to return multiple values from functions.

In [38]:
def sum_and_product(x,y):
    return (x + y),(x * y)

In [39]:
print(sum_and_product(2,3))

(5, 6)


Tuples (and lists) can also be used for multiple assignments.

In [40]:
x, y = 1, 2
x, y = y, x # Pythonic way of swaping variables.

### Dictionaries

In [41]:
empty_dict = {} # Pythonic
empty_dict2 = dict() # less Pythonic
grades = {"Joel": 80, "Tim": 95}

In [42]:
joels_grade = grades["Joel"] # equals 80

You'll get a KeyError if you ask for a key that's not in the dictionary.

In [44]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("No grade for Kate!")

No grade for Kate!


In [47]:
print("Joel" in grades)

True


In [48]:
print("Kate" in grades)

False


Dictionaries have a *get* method that returns a default value (instead of raising an exception) when you look up a key that's not in the dictionary:

In [54]:
print(grades.get("Joel",0))

80


In [53]:
print(grades.get("Kate",0))

0


In [52]:
print(grades.get("No one"))

None


In [55]:
grades["Kate"] = 100

In [56]:
len(grades)

3

We will frequently use dictionaries as a simple way to represent structured data:

In [57]:
tweet = {
    "user": "joelgrus",
    "text": "Data Science is Awesome",
    "retweet_count": 100,
    "hashtags": ["#data", "#science", "#datascience", "#awesome"]
}

In [58]:
print(tweet)

{'user': 'joelgrus', 'text': 'Data Science is Awesome', 'retweet_count': 100, 'hashtags': ['#data', '#science', '#datascience', '#awesome']}


In [59]:
tweet.keys()

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [60]:
tweet.values()

dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience', '#awesome']])

In [61]:
tweet.items()

dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome'])])

In [62]:
print("user" in tweet.keys()) # True, but uses a slow list in

print("user" in tweet) # more Pythonic, uses faster dict in

print("joelgrus" in tweet.values()) # True

True
True
True


Dictionary keys must be immutable; in particular, you cannot use *list*s as keys. If you need a multipart key, you should use a *tuple* or figure out a way to turn the key into a string.

#### Word count

In [64]:
def word_count_first(document):
    word_counts = {}
    for word in document:
        if word in word_counts:
            word_counts[word] += 1
        else:
            word_counts[word] = 1

##### Forgiveness is better than permission

In [67]:
def word_count_second(document):
    word_counts = {}
    for word in document:
        try:
            word_counts[word] += 1
        except KeyError:
            word_counts[word] = 1

In [68]:
def word_count_third(document):
    word_counts = {}
    for word in document:
        previous_count = word_counts.get(word,0)
        word_counts[word] = previous_count + 1

#### defaultdicts

A *defaultdict* is like a regular dictionary except that when you try to look up a key it does'nt contain, it first adds a value for it using a zero-argument function you provided when you created it.

In [71]:
from collections import defaultdict

def word_count(document):
    word_counts = defaultdict(int) # int() produces 0

    for word in document:
        word_counts[word] += 1

    return word_counts

### Counter

A counter turns a sequence of values into a *defauldict(int)*-like object mapping keys to counts. We will primarily use it to create histograms.

In [74]:
from collections import Counter

c = Counter([0,1,2,0])

print(c.items())

dict_items([(0, 2), (1, 1), (2, 1)])


### Sets

Another data sctructure is *set*, which represents a collection of **distinct** elements.

In [76]:
s = set()

s.add(1)
s.add(2)
s.add(2)

print(len(s))
print(2 in s)
print(3 in s)
print("s: ", s)

2
True
False
s:  {1, 2}


We'll use *set*s for two main reasons.

The first is that *in* is a very fast operation on sets.

The second reason is to find the **distinct** items in a collection.

### Control Flow

In [77]:
if 1 > 2:
    message = "if only 1 were greaterthan two..."
elif 1 > 3:
    message = "elif stands for else if"
else:
    message = "when all else fails, use else (if you want to)"

Ternary if-then-else in one line:

In [78]:
parity = "even" if x % 2 == 0 else "odd"

In [79]:
x = 0
while x < 10:
    print(x,'is less than 10')
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [80]:
for x in range(10):
    if x == 3:
        continue # go immediately to the next iteration
    if x == 5:
        break # quit the loop entirely
    print(x)

0
1
2
4


### Truthiness

In [81]:
one_is_less_than_two = 1 < 2
print(one_is_less_than_two)

True


In [82]:
true_equals_false = True == False
print(true_equals_false)

False


Python uses the value *None* to indicate a nonexistent value. It is similar to other languages' *null*.

In [83]:
x = None
print(x == None) # True, but not Pythonic
print(x is None) # True, Pythonic

True
True


Python lets you use any value where it expects a Boolean. The following are all "Falsy":

* False
* None
* [] (empty list)
* {} (empty dict)
* ""
* set()
* 0
* 0.0

Pretty much anything else gets treated as True.

Python has an *all* function, which takes a list and returns True precisely when every element is truthy, and an *any* function, which returns True when at least one element is truthy:

In [84]:
print( all([True, 1, {3}]) )

True


In [85]:
print( all([True, 1, {}]) )

False


In [86]:
print( all([]) ) # No falsy element

True


In [87]:
print( any([True, 1, {}]) )

True


In [88]:
print( any([]) )

False


### The Not-So-Basics

#### Sorting

In [89]:
x = [4,1,2,3]
y = sorted(x)
print(x,y)
x.sort()
print(x,y)

[4, 1, 2, 3] [1, 2, 3, 4]
[1, 2, 3, 4] [1, 2, 3, 4]


In [90]:
# sort the list by absolute value from largest to smallest
x = sorted([-4,1,-2,3], key=abs, reverse=True)
print(x)

[-4, 3, -2, 1]


In [None]:
# sort the words and counts from highest count to lowest
wc = sorted(word_counts.items(),
           key=lambda (word,count): count,
           reverse=True)

#### List Comprehensions

In [91]:
even_numbers = [x for x in range(10) if x % 2 == 0]
print(even_numbers)

[0, 2, 4, 6, 8]


In [92]:
squares = [x * x for x in range(10)]
print(squares)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [93]:
even_squares = [x * x for x in even_numbers]
print(even_squares)

[0, 4, 16, 36, 64]


In [94]:
square_dict = {x : x * x for x in range(10)}
print(square_dict)

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}


In [95]:
square_set = {x * x for x in [1,-1]}
print(square_set)

{1}


In [96]:
zeroes = [0 for _ in even_numbers]
print(zeroes)

[0, 0, 0, 0, 0]


In [97]:
pairs = [(x,y)
        for x in range(5)
        for y in range(5)]
print(pairs)

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]


In [99]:
# only pairs wih x < y
increasing_pairs = [(x,y)
                   for x in range(10)
                   for y in range(x+1,10)]
print(increasing_pairs)

[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9), (3, 4), (3, 5), (3, 6), (3, 7), (3, 8), (3, 9), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9), (5, 6), (5, 7), (5, 8), (5, 9), (6, 7), (6, 8), (6, 9), (7, 8), (7, 9), (8, 9)]


#### Generators and Iterators

A generator is something that you can iterate over but whose values are produced only as needed (lazily).

In [102]:
def lazy_range(n):
    i = 0
    while i < n:
        yield i
        i += 1

In [103]:
for i in lazy_range(5):
    print(i)

0
1
2
3
4


In Python 3, range() is *lazy*.

A second way to create geerators is by using *for* comprehensions wrapped in paretheses:

In [104]:
lazy_evens_below_20 = (i for i in lazy_range(20) if i % 2 == 0)

#### Randomness

In [106]:
import random

four_uniform_randoms = [random.random() for _ in range(4)]
print(four_uniform_randoms)

[0.6019360031047887, 0.14089945794155745, 0.9448391022169537, 0.14289766296274642]


In [107]:
random.seed(10)
print(random.random())

random.seed(10)
print(random.random())

0.5714025946899135
0.5714025946899135


In [108]:
# chooses randomly from range(10)
print(random.randrange(10))

6


In [109]:
# chooses randomly from range(3,6)
random.randrange(3,6)

4

In [112]:
up_to_ten = [x for x in range(10)]
random.shuffle(up_to_ten)
print(up_to_ten)

[6, 8, 4, 7, 0, 2, 9, 3, 1, 5]


In [113]:
print(random.choice(["Alice", "Bob", "Carol"]))

Bob


In [114]:
# sample WITHOUT replacement
lottery_numbers = [x for x in range(60)]
winning_numbers = random.sample(lottery_numbers,6)

print(winning_numbers)

[26, 18, 52, 43, 16, 29]


In [116]:
# sampl WITH replacement
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
print(four_with_replacement)

[7, 3, 7, 9]


#### Regular Expressions

In [118]:
import re

print(all([
    not re.match("a","cat"),   # 'cat' does not start with 'a' 
    re.search("a","cat"),      # 'cat' has an 'a' in it 
    not re.search("c","dog"),  # 'dog' does not have a 'c' in it
    3 == len(re.split("[ab]","carbs")),   # split on a or b to ['c','r','s']
    "R-D-" == re.sub("[0-9]","-","R2D2")  # replace digits with dashes
]))

True


### Object-oriented programming

In [119]:
# by convention, we give classes PascalCase (Camel case) names
class Set:
    # These are the member functions
    # every one takes a first parameter 'self' (non-static methods)
    # that refers to the particular Set object being used

    def __init__(self,values=None):
        """
        Constructor
        """
        self.dict = {}

        if values is not None:
            for value in values:
                self.add(value)

    def __repr__(self):
        """
        This is the string representation of a Set object (toString)
        """
        return "Set: " + str(self.dict.keys())

    # we'll represent membership by being a key in self.dict with value True
    def add(self,value):
        self.dict[value] = True
    
    # value is in the Set if it's a key in self.dict
    def contains(self,value):
        return value in self.dict

    def remove(self,value):
        del self.dict[value]


In [120]:
s = Set([1,2,3])
s.add(4)
print(s.contains(4))
s.remove(3)
print(s.contains(3))

True
False


### Functional Tools

functools.partial

In [121]:
def exp(base,power):
    return base ** power

In [122]:
def two_to_the(power):
    return exp(2,power)

In [123]:
from functools import partial
two_to_the = partial(exp,2)
print(two_to_the(3))

8


In [124]:
square_of = partial(exp,power=2)
print(square_of(3))

9


map, reduce, filter

In [125]:
def double(x):
    return 2 * x

In [130]:
xs = [1,2,3,4]
twice_xs = [double(x) for x in xs]
print(twice_xs)
twice_xs = map(double,xs)
print(twice_xs)
list_doubler = partial(map,double)
twice_xs = list_doubler(xs)
print(twice_xs)

[2, 4, 6, 8]
<map object at 0x7f80575c99e8>
<map object at 0x7f8057e6a978>


In [132]:
def multiply(x,y): return x * y

products = map(multiply, [1,2], [4,5])
print([p for p in products])

[4, 10]


In [133]:
def is_even(x):
    return x % 2 == 0

In [137]:
x_evens = [x for x in xs if is_even(x)]
print(x_evens)
x_evens = filter(is_even,xs)
print([f for f in x_evens])
list_evener = partial(filter,is_even)
x_evens = list_evener(xs)
print([x for x in x_evens])

[2, 4]
[2, 4]
[2, 4]


In [140]:
from functools import reduce

x_product = reduce(multiply,xs)
print(x_product)
list_product = partial(reduce,multiply)
x_product = list_product(xs)
print(x_product)

24
24


enumerate

In [144]:
for i, v in enumerate([1,3,5,7,9]):
    print(i,v)

0 1
1 3
2 5
3 7
4 9


In [145]:
for i, _ in enumerate([1,3,5,7,9]):
    print(i)

0
1
2
3
4


*zip* and Argument Unpacking

In [146]:
list1 = [1,2,3,4]
list2 = ['a','b','c','d']
print([z for z in zip(list1,list2)])

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]


In [147]:
l1 = [1,2,3,4,5]
l2 = [1,2,3]
print([z for z in zip(l1,l2)])

[(1, 1), (2, 2), (3, 3)]


In [149]:
pairs = [(1,'a'), (2,'b'), (3,'c')]
nums, lets = zip(*pairs)
print(nums,lets)

(1, 2, 3) ('a', 'b', 'c')


In [151]:
def add(a,b): return a + b

In [152]:
add(1,2)

3

In [153]:
add(*[1,2])

3

*args* and *kwargs*

In [154]:
def magic(*args, **kwargs):
    print("unnamed args:", args)
    print("keyword args:", kwargs)

In [155]:
magic(1,2,key="word",key2="word2")

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


In [156]:
def other_way_magic(x,y,z):
    return x + y + z

In [157]:
x_y_list = [1, 2]
z_dict = {"z" : 3}
print(other_way_magic(*x_y_list,**z_dict))

6


In [162]:
# doubles the result of f, no matter how many args f takes
def doubler(f):
    def g(*args,**kwargs):
        return 2 * f(*args,**kwargs)
    return g

In [163]:
g = doubler(lambda x,y: x+y)
print(g(1,2))

6
