# The Zen of Python

In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


# Whitespace Formatting

In [2]:
# whitespace ignored inside parentheses and brackets

In [3]:
# backslash indicates statement continues onto next line

two_plus_three = 2 + \
3
two_plus_three

5

# Modules

In [4]:
# never import the entire contents of a module

match = 10
from re import *
print match

<function match at 0x00000000022B0828>


# Arithmetic

In [5]:
# to ensure new-style division instead of integer division

from __future__ import division

# Functions

In [6]:
# function a rule for taking zero or more inputs and returning a corresponding output

In [7]:
def double(x):
    ''' this is called a docstring '''
    return x * 2

In [8]:
# python functions are first-class, which means we can assign them to 
# variables and pass them to functions just like any other arguments

def apply_to_one(f):
    '''calls function f with one as its argument '''
    return f(1)

my_double = double
x = apply_to_one(double)

print x

2


In [9]:
# short anonymous functions called lambdas

another_double = lambda x: 2 * x # don't do this
def another_double(x): return 2 * x # do this instead

# Strings

In [10]:
# create multiline strings using triple quotes

multi_string = """ first line
second line
third line"""

print multi_string

 first line
second line
third line


# Exceptions

In [11]:
try:
    print 0 / 0
except ZeroDivisionError:
    print 'cannot divide by zero'

cannot divide by zero


# Lists

In [12]:
# in operator checks for membership

print 1 in [1,2,3]
print 0 in [1,2,3]

True
False


In [13]:
# convenient to unpack lists

x, y = [1,2]

# common to use underscore for value that is going to be thrown away

_, y = [1,2]

# Tuples

In [14]:
# tuples are lists immutable (unchanging) cousins
# anything you can do to a list, that doesn't involve modifying, you can do to a tuple

In [15]:
# define with parentheses

my_tuple = (3, 4)

# or without

other_tuple = 2, 3

In [16]:
# convenient way to return multiple values from functions
# can be used for multiple assignment

# Dictionaries

In [17]:
grades = {"Joel" : 80,
         "Tim": 95}

In [18]:
# check for existence of key (NOT value) with in

print "Joel" in grades
print "Kate" in grades

True
False


In [19]:
# get method returns a default value (instead of raising an exception) when you try for a key that isn't in dict

print grades.get("Joel", 0)
print grades.get("Kate", 0)
print grades.get("no one")

80
0
None


In [20]:
# structured data in dicts

tweet = {
    "user": "joelgrus",
    "text": "Data Science is Awesome",
    "hashtags": ["#data", "#datascience"]
}

In [21]:
print tweet.keys()

['text', 'hashtags', 'user']


In [22]:
print tweet.values()

['Data Science is Awesome', ['#data', '#datascience'], 'joelgrus']


In [23]:
print tweet.items()

[('text', 'Data Science is Awesome'), ('hashtags', ['#data', '#datascience']), ('user', 'joelgrus')]


In [24]:
# dictionary keys must be immutable (cannot use list as a key)

# defaultdict

In [25]:
# like a regular dictionary except has default behavior for keys that don't exist

from collections import defaultdict

document = 'ryan menezes doing data science first principles with python doing'.split()

word_counts = defaultdict(int) # int() produces 0 by default
for word in document:
    word_counts[word] += 1
    
dd_list = defaultdict(list) # list() produces []
dd_list[2].append(1)

dd_dict = defaultdict(dict) # dict() produces {}
dd_dict['Joel']['city'] = 'Seattle'

dd_pair = defaultdict(lambda: [0,0])
dd_pair[2][1] = 1

print word_counts
print dd_list
print dd_dict
print dd_pair

defaultdict(<type 'int'>, {'doing': 2, 'science': 1, 'with': 1, 'principles': 1, 'ryan': 1, 'python': 1, 'menezes': 1, 'data': 1, 'first': 1})
defaultdict(<type 'list'>, {2: [1]})
defaultdict(<type 'dict'>, {'Joel': {'city': 'Seattle'}})
defaultdict(<function <lambda> at 0x00000000040E4A58>, {2: [0, 1]})


# Counter

In [26]:
# turns a sequence of values into a defaultdict(int)-like object
# frequency table
# can be used for histograms

from collections import Counter
print Counter([0,1,2,0])

Counter({0: 2, 1: 1, 2: 1})


# Sets

In [27]:
# collection of distinct elements

s = set()
s.add(1)
s.add(2)
s.add(2)
print s

set([1, 2])


In [28]:
# in a very fast operation on sets (not fast on lists)

# finds unique values

# Control Flow

In [32]:
if 1 > 2:
    # do something
    m = 1
elif 1 > 3:
    # do something different
    m = 19
else:
    # when all else fails
    m = 12

In [30]:
# ternary if-then-else on one line

parity = 'even' if x % 2 else 'odd'

In [36]:
# while loop

x = 0
while x < 10:
    print x, "is less than 10"
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [37]:
# for loop, using continue and break

for x in range(10):
    if x == 3:
        continue # go to next iteration
    if x == 5:
        break # stop loop entirely
    print x

0
1
2
4


# Truthiness

In [40]:
# all of these are false-y

False
None
[]
{}
""
set()
0
0.0

0.0

In [45]:
s = 'this is some string'

# to prevent an index error for no string

first_char = s and s[0]

# equivalent to

if s:
    first_char = s[0]
else:
    first_char = ''
    
print first_char

t


In [47]:
# all function takes list and returns True when every element is True
# any returns True when at least one element is truthy

print all([True, 1, {3}])
print all([True, []])
print any([True, []])
print all([])
print any([])

True
False
True
True
False


# Sorting

In [54]:
x = [4,1,2,3]
print sorted(x) # returns new list

[1, 2, 3, 4]


In [56]:
y = [5,1,6,8,2]
y.sort() # works in place
print y

[1, 2, 5, 6, 8]


In [58]:
# can specify reverse = True parameter
# can compare results of a function

x = sorted([-4,1,2,3], key = abs, reverse=True)
print x

[-4, 3, 2, 1]


In [59]:
# sort words and counts from highest to lowest count

wc = sorted(word_counts.items(),
           key = lambda (word, count): count,
           reverse = True)

# List Comprehensions

In [65]:
# even numbers

even_numbers = [x for x in range(5) if x % 2 == 0]
even_numbers

[0, 2, 4]

In [62]:
[x * x for x in range(5)]

[0, 1, 4, 9, 16]

In [63]:
# dict

{ x: x * x for x in range(5)}

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [64]:
# set

{ x * x for x in [1, -1]}

{1}

In [66]:
# if you don't need value from list, use underscore

zeroes = [0 for _ in even_numbers] # make a list that has 0 for as many elements as in even_numbers
zeroes

[0, 0, 0]

In [71]:
# multiple fors

[(x,y) for x in range(3) for y in range(x+1, 4)]

[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]

# Generators and Iterators

In [76]:
# a generator is something that you can iterate over whose values are only produced as needed

def lazy_range(n):
    ''' lazy version of range '''
    i = 0
    while i < n:
        yield i
        i += 1
        
# only produces values as necessary

# for range, can use xrange()

In [78]:
lazy_evens_below_20 = (i for i in xrange(20) if i % 2 == 0)
print lazy_evens_below_20

<generator object <genexpr> at 0x0000000007130BD0>


In [79]:
# iteritems() lazily yields key-value pairs as needed

# Randomness

In [80]:
import random

In [82]:
four_uniform_randoms = [random.random() for _ in range(4)]
four_uniform_randoms

[0.4306526006558363,
 0.7589887323624526,
 0.09180788245782112,
 0.680793753864897]

In [83]:
# actually psuedorandom numbers based on an internal state you can set with random.seed for reproducibility

random.seed(10)
print random.random()
print random.random()
random.seed(10)
print random.random()

0.57140259469
0.428889054675
0.57140259469


In [87]:
random.randrange(4,10)

7

In [89]:
up_to_ten = range(10)
random.shuffle(up_to_ten)
up_to_ten

[8, 3, 7, 5, 1, 2, 4, 6, 0, 9]

In [90]:
random.choice(['alice','in','wonderland'])

'in'

In [91]:
# sample without replacement

random.sample(range(60), 6)

[7, 45, 56, 55, 34, 2]

# Object-Oriented Programming

In [103]:
# build our own set class

# classes take PascalCase names

class MySet:
    
    # these are member functions
    # every one takes "self" that refers to particular object being used
    
    def __init__(self, values = None):
        '''This is the constructor.
        It gets called when you create a new MySet.
        You would use it like this
        
        s1 = MySet() # empty set
        s2 = MySet([1,2,3]) # initialize with values
        '''
        
        self.dict = {}
        '''Each instance of Set has its own dict property
        which is what we'll use to track memberships'''
        
        if values is not None:
            for value in values:
                self.add(value)
    
    def __repr__(self):
        '''This is the string representation of a Set object
        if you type it at the Python prompt or pass it to str()'''
        return "MySet: " + str(self.dict.keys())
    
    # we'll represent membership by being a key in self.dict with value True
    
    def add(self, value):
        self.dict[value] = True
        
    # value is in the MySet if it's a key in the dictionary
    
    def contains(self, value):
        return value in self.dict
    
    def remove(self, value):
        del self.dict[value]

In [104]:
s = MySet([4,5,6,6,7])

In [105]:
s

MySet: [4, 5, 6, 7]

In [106]:
s.add(20)

In [107]:
print s.contains(4)

True


In [108]:
print s.contains(3)

False


In [109]:
print s.contains(6)
s.remove(6)
print s.contains(6)

True
False


# Functional Tools

In [6]:
# partially apply functions to create new functions

def exp(base, power):
    return base ** power

# exp(2, power)

def two_to_the(power):
    return exp(2, power)

# use partial to use previously defined function as template

from functools import partial

two_to_the = partial(exp, 2)
square_of = partial(exp, power = 2)

print square_of

<functools.partial object at 0x00000000040CB9A8>


In [10]:
# map

def double(x):
    return 2 * x

xs = [1,2,3,4]

print [double(x) for x in xs]
print map(double, xs) # same as above

list_doubler = partial(map, double) # function that doubles a list
twice_xs = list_doubler(xs)

print twice_xs

[2, 4, 6, 8]
[2, 4, 6, 8]
[2, 4, 6, 8]


In [3]:
# can use map with multiple argument functions

def multiply(x,y): return x * y

products = map(multiply, [1,2], [4,5])
print products

[4, 10]


In [9]:
# filter does work of a list comprehension with IF statement

def is_even(x):
    return x % 2 == 0

print [ x for x in xs if is_even(x) ]
print filter(is_even, xs) # same as above

[2, 4]
[2, 4]


In [8]:
list_evener = partial(filter, is_even)
list_evener(xs)

[2, 4]

In [11]:
# reduce combines elements of a list

reduce(multiply, xs) # 1*2*3*4

24

# enumerate

In [None]:
# pythonic convention for iterating

for i, document in enumerate(documents):
    '''tuples created that are (index, element)'''
    do_something(i, document)

# zip and Argument Unpacking

In [15]:
# zipping two or more lists together
# makes a data frame of sorts

print zip([1,2,3], ['a','b','c'])
print zip([1,2,3], ['a','b','c'], [17, 290, 57])

[(1, 'a'), (2, 'b'), (3, 'c')]
[(1, 'a', 17), (2, 'b', 290), (3, 'c', 57)]


In [16]:
# unzip using a strange trick

pairs = [(1, 'a'), (2, 'b'), (3, 'c')]

# asterisk performs argument unpacking

letters, numbers = zip(*pairs)
print letters
print numbers

(1, 2, 3)
('a', 'b', 'c')


In [18]:
# can use argument unpacking with any function

def add(x,y): return x+y

add(*[4,5])

9

# args and kwargs

In [19]:
# way to specify a function that takes arbitrary arguments
# works with arguement unpacking

In [26]:
def magic(*args, **kwargs):
    print "unnamed args: ", args # a tuple of unnamed arguments
    print "keyword args: ", kwargs # a dict of keyword arguments
    
magic(1, 2, key1='five', key2 = 'six')

unnamed args:  (1, 2)
keyword args:  {'key2': 'six', 'key1': 'five'}


In [27]:
x_y_list = [1,2]
z_dict = { 'z': 3 }

def other_way_magic(x, y, z):
    return x + y + z

print other_way_magic(*x_y_list, **z_dict)

6


In [None]:
# use to produce higher order functions whose inputs can accept arbitrary arguments