# Chapter2

#### Getting Python

In [1]:
pip install ipython


The following command must be run outside of the IPython shell:

    $ pip install ipython

The Python package manager (pip) can only be used from outside of IPython.
Please reissue the `pip` command in a separate terminal or command prompt.

See the Python documentation for more information on how to install packages:

    https://docs.python.org/3/installing/


#### Whitespace Formatting

In [2]:
for i in [1, 2, 3, 4, 5]:
    print (i) # first line in "for i" block
    for j in [1, 2, 3, 4, 5]:
        print (j) # first line in "for j" block
        print (i + j) # last line in "for j" block
    print (i) # last line in "for i" block
print ("done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [3]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +
13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)

In [4]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

easier_to_read_list_of_lists = [ [1, 2, 3],
                                [4, 5, 6],
                                [7, 8, 9] ]

In [5]:
two_plus_three = 2 + \
3

print (two_plus_three)

5


In [6]:
for i in [1, 2, 3, 4, 5]:
# notice the blank line
    print (i)

1
2
3
4
5


#### Modules

In [7]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [8]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)

In [9]:
import matplotlib.pyplot as plt

In [10]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [11]:
match = 10
from re import * # uh oh, re has a match function
print (match) # "<function re.match>"

<function match at 0x0000024F6C308BF8>


#### Arithmetic

In [12]:
from __future__ import division

#### Functions

In [13]:
def double(x):
    """this is where you put an optional docstring
    that explains what the function does.
    for example, this function multiplies its input by 2"""
    return (x * 2)

In [14]:
def apply_to_one(f):
    """calls the function f with 1 as its argument"""
    return f(1)
    
my_double = double # refers to the previously defined function
x = apply_to_one(my_double) # equals 2

In [15]:
y = apply_to_one(lambda x: x + 4) # equals 5

In [16]:
print (y)

5


In [17]:
def another_double(x): return 2 * x

In [18]:
def my_print(message="my default message"):
    print (message)
my_print("hello") # prints 'hello'
my_print() # prints 'my default message'

hello
my default message


In [19]:
def subtract(a=0, b=0):
    return a - b

subtract(10, 5) # returns 5
subtract(0, 5) # returns -5
subtract(b=5) # same as previous

-5

#### Strings

In [20]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

In [21]:
tab_string = "\t" # represents the tab character
len(tab_string) # is 1

1

In [22]:
not_tab_string = r"\t" # represents the characters '\' and 't'
len(not_tab_string) # is 2

2

In [23]:
multi_line_string = """This is the first line.
and this is the second line
and this is the third line"""
print (multi_line_string)

This is the first line.
and this is the second line
and this is the third line


#### Exceptions

In [24]:
try:
    print (0 / 0)
except ZeroDivisionError:
    print ("cannot divide by zero")

cannot divide by zero


#### Lists

In [25]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [ integer_list, heterogeneous_list, [] ]
list_length = len(integer_list) # equals 3
list_sum = sum(integer_list) # equals 6

In [26]:
x = range(10) # is the list [0, 1, ..., 9]
zero = x[0] # equals 0, lists are 0-indexed
one = x[1] # equals 1
nine = x[-1] # equals 9, 'Pythonic' for last element
eight = x[-2] # equals 8, 'Pythonic' for next-to-last element
x[0] = -1 # now x is [-1, 1, 2, 3, ..., 9]

TypeError: 'range' object does not support item assignment

In [None]:
x = range(10)

In [None]:
zero = x[0]

In [None]:
one = x[1]

In [None]:
nine = x[-1]

In [None]:
eight = x[-2]

In [None]:
x[0] = -1

In [None]:
first_three = x[:3] # [-1, 1, 2]
three_to_end = x[3:] # [3, 4, ..., 9]
one_to_four = x[1:5] # [1, 2, 3, 4]
last_three = x[-3:] # [7, 8, 9]
without_first_and_last = x[1:-1] # [1, 2, ..., 8]
copy_of_x = x[:] # [-1, 1, 2, ..., 9]

In [None]:
1 in [1, 2, 3] # True
0 in [1, 2, 3] # False

In [None]:
x = [1, 2, 3]
x.extend([4, 5, 6]) # x is now [1,2,3,4,5,6]

In [None]:
x = [1, 2, 3]
y = x + [4, 5, 6] # y is [1, 2, 3, 4, 5, 6]; x is unchanged

In [None]:
x = [1, 2, 3]
x.append(0) # x is now [1, 2, 3, 0]
y = x[-1] # equals 0
z = len(x) # equals 4

In [None]:
y = [1, 2] # now x is 1, y is 2

In [None]:
_, y = [1, 2] # now y == 2, didn't care about the first element

Tuples

In [None]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3 # my_list is now [1, 3]

try:
    my_tuple[1] = 3
except TypeError:
    print ("cannot modify a tuple")

In [None]:
def sum_and_product(x, y):
    return (x + y),(x * y)
sp = sum_and_product(2, 3) # equals (5, 6)
s, p = sum_and_product(5, 10) # s is 15, p is 50

In [None]:
x, y = 1, 2 # now x is 1, y is 2
x, y = y, x # Pythonic way to swap variables; now x is 2, y is 1

Dictionaries

In [None]:
empty_dict = {} # Pythonic
empty_dict2 = dict() # less Pythonic
grades = { "Joel" : 80, "Tim" : 95 } # dictionary literal

In [None]:
joels_grade = grades["Joel"] # equals 80

In [None]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print ("no grade for Kate!")

In [None]:
joel_has_grade = "Joel" in grades # True
kate_has_grade = "Kate" in grades # False

In [None]:
joels_grade = grades.get("Joel", 0) # equals 80
kates_grade = grades.get("Kate", 0) # equals 0
no_ones_grade = grades.get("No One") # default default is None

In [None]:
grades["Tim"] = 99 # replaces the old value
grades["Kate"] = 100 # adds a third entry
num_students = len(grades) # equals 3

In [None]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [None]:
tweet_keys = tweet.keys() # list of keys
tweet_values = tweet.values() # list of values
tweet_items = tweet.items() # list of (key, value) tuples
"user" in tweet_keys # True, but uses a slow list in
"user" in tweet # more Pythonic, uses faster dict in
"joelgrus" in tweet_values # True

In [None]:
defaultdict

In [None]:
word_counts = {}

for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

In [None]:
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

In [None]:
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [None]:
from collections import defaultdict


In [None]:

word_counts = defaultdict(int) # int() produces 0
for word in document:
    word_counts[word] += 1

In [None]:
dd_list = defaultdict(list) # list() produces an empty list
dd_list[2].append(1) # now dd_list contains {2: [1]}
dd_dict = defaultdict(dict) # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle" # { "Joel" : { "City" : Seattle"}}
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1 # now dd_pair contains {2: [0,1]}

Counter

In [None]:
from collections import Counter
c = Counter([0, 1, 2, 0]) # c is (basically) { 0 : 2, 1 : 1, 2 : 1 }

In [None]:
word_counts = Counter(document)

In [None]:
# print the 10 most common words and their counts
for word, count in word_counts.most_common(10):
    print (word_counts)

Sets

In [None]:
s = set()
s.add(1) # s is now { 1 }
s.add(2) # s is now { 1, 2 }
s.add(2) # s is still { 1, 2 }
x = len(s) # equals 2
y = 2 in s # equals True
z = 3 in s # equals False

In [None]:
stopwords_list = ["a","an","at"] 
hundreds_of_other_words = ["yet", "you"]
"zip" in stopwords_list # False, but have to check every element

In [None]:
stopwords_set = set(stopwords_list)
"zip" in stopwords_set # very fast to check

In [None]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list) # 6
item_set = set(item_list) # {1, 2, 3}
num_distinct_items = len(item_set) # 3
distinct_item_list = list(item_set) # [1, 2, 3]

Control Flow

In [None]:
if 1 > 2:
    message = "if only 1 were greater than two…"
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

In [None]:
parity = "even" if x % 2 == 0 else "odd"

In [None]:
x = 0
while x < 10:
    print (x, "is less than 10")
    x += 1

In [None]:
for x in range(10):
    print (x, "is less than 10")

In [None]:
for x in range(10):
    if x == 3:
        continue # go immediately to the next iteration
    if x == 5:
        break # quit the loop entirely
    print (x)

Truthiness

In [None]:
one_is_less_than_two = 1 < 2 # equals True
true_equals_false = True == False # equals False

In [None]:
x = None
print (x == None) # prints True, but is not Pythonic
print (x is None) # prints True, and is Pythonic

In [None]:
s = some_function_that_returns_a_string()

if s:
    first_char = s[0]
else:
    first_char = ""

In [None]:
first_char = s and s[0]

In [None]:
safe_x = x or 0

In [None]:
all([True, 1, { 3 }]) # True
all([True, 1, {}]) # False, {} is falsy
any([True, 1, {}]) # True, True is truthy
all([]) # True, no falsy elements in the list
any([]) # False, no truthy elements in the list

Sorting

In [None]:
x = [4,1,2,3]
y = sorted(x) # is [1,2,3,4], x is unchanged
x.sort() # now x is [1,2,3,4]

In [None]:
# sort the list by absolute value from largest to smallest
x = sorted([-4,1,-2,3], key=abs, reverse=True) # is [-4,3,-2,1]

# sort the words and counts from highest count to lowest
wc = sorted(word_counts.items(),
            key=lambda (word,count): count,
            reverse=True)

List Comprehensions

In [None]:
even_numbers = [x for x in range(5) if x % 2 == 0] # [0, 2, 4]
squares = [x * x for x in range(5)] # [0, 1, 4, 9, 16]
even_squares = [x * x for x in even_numbers] # [0, 4, 16]

In [None]:
square_dict = { x : x * x for x in range(5) } # { 0:0, 1:1, 2:4, 3:9, 4:16 }
square_set = { x * x for x in [1, -1] } # { 1 }

In [None]:
zeroes = [0 for _ in even_numbers] # has the same length as even_numbers

In [None]:
pairs = [(x, y)
    for x in range(10)
    for y in range(10)] # 100 pairs (0,0) (0,1) ... (9,8), (9,9)


In [None]:
increasing_pairs = [(x, y) # only pairs with x < y,
for x in range(10) # range(lo, hi) equals
for y in range(x + 1, 10)] # [lo, lo + 1, ..., hi - 1]

Generators and Iterators

In [None]:
def lazy_range(n):
    """a lazy version of range"""
    i = 0
    while i < n:
        yield i
        i += 1

In [None]:
for i in lazy_range(10):
    do_something_with(i)

In [None]:
def natural_numbers():
    """returns 1, 2, 3, ..."""
    n = 1
    while True:
        yield n
        n += 1

In [None]:
lazy_evens_below_20 = (i for i in lazy_range(20) if i % 2 == 0)

Randomness

In [None]:
import random
four_uniform_randoms = [random.random() for _ in range(4)]
# [0.8444218515250481, # random.random() produces numbers
# 0.7579544029403025, # uniformly between 0 and 1
# 0.420571580830845, # it's the random function we'll use
# 0.25891675029296335] # most often

In [None]:
random.seed(10) # set the seed to 10
print (random.random()) # 0.57140259469
random.seed(10) # reset the seed to 10
print (random.random()) # 0.57140259469 again

In [None]:
random.randrange(10) # choose randomly from range(10) = [0, 1, ..., 9]
random.randrange(3, 6) # choose randomly from range(3, 6) = [3, 4, 5]

In [None]:
up_to_ten = range(10)
random.shuffle(up_to_ten)
print up_to_ten
# [2, 5, 1, 9, 7, 3, 8, 6, 4, 0] (your results will probably be different)

In [None]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"]) # "Bob" for me

In [27]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6) # [16, 36, 10, 6, 25, 9]

NameError: name 'random' is not defined

In [33]:
four_with_replacement = [random.choice(range(10)) for _ in range(4)]

NameError: name 'random' is not defined

Regular Expressions

In [37]:
import re

print all([ # all of these are true, because
    not re.match("a", "cat"), # * 'cat' doesn't start with 'a'
    re.search("a", "cat"), # * 'cat' has an 'a' in it
    not re.search("c", "dog"), # * 'dog' doesn't have a 'c' in it
    3 == len(re.split("[ab]", "carbs")), # * split on a or b to ['c','r','s']
    "R-D-" == re.sub("[0-9]", "-", "R2D2") # * replace digits with dashes
    ]) # prints True

SyntaxError: invalid syntax (<ipython-input-37-096ecc75d81a>, line 3)

Object-Oriented Programming

In [42]:
# by convention, we give classes PascalCase names
class Set:
    # these are the member functions
    # every one takes a first parameter "self" (another convention)
    # that refers to the particular Set object being used
    
    def __init__(self, values=None):
        """This is the constructor.
        It gets called when you create a new Set.
        You would use it like
        s1 = Set() # empty set
        s2 = Set([1,2,2,3]) # initialize with values"""
        
        self.dict = {} # each instance of Set has its own dict property
        # which is what we'll use to track memberships
        
        if values is not None:
            for value in values:
                self.add(value)
        
    def __repr__(self):
        """this is the string representation of a Set object
        if you type it at the Python prompt or pass it to str()"""
        return "Set: " + str(self.dict.keys())
    
    # we'll represent membership by being a key in self.dict with value True
    def add(self, value):
        self.dict[value] = True
    
    # value is in the Set if it's a key in the dictionary
    def contains(self, value):
        return value in self.dict
    
    def remove(self, value):
        del self.dict[value]

In [45]:
s = Set([1,2,3])
s.add(4)
print (s.contains(4)) # True
s.remove(3)
print (s.contains(3)) # False

True
False


Functional Tools

In [46]:
def exp(base, power):
    return base ** power

In [47]:
def two_to_the(power):
    return exp(2, power)

In [49]:
from functools import partial
two_to_the = partial(exp, 2) # is now a function of one variable
print (two_to_the(3)) # 8

8


In [51]:
square_of = partial(exp, power=2)
print (square_of(3)) # 9

9


In [52]:
def double(x):
    return 2 * x

xs = [1, 2, 3, 4]
twice_xs = [double(x) for x in xs] # [2, 4, 6, 8]
twice_xs = map(double, xs) # same as above
list_doubler = partial(map, double) # *function* that doubles a list
twice_xs = list_doubler(xs) # again [2, 4, 6, 8]

In [53]:
def multiply(x, y): return x * y

products = map(multiply, [1, 2], [4, 5]) # [1 * 4, 2 * 5] = [4, 10]

In [54]:
def is_even(x):
    """True if x is even, False if x is odd"""
    return x % 2 == 0

In [55]:
x_evens = [x for x in xs if is_even(x)] # [2, 4]
x_evens = filter(is_even, xs) # same as above
list_evener = partial(filter, is_even) # *function* that filters a list
x_evens = list_evener(xs) # again [2, 4]

In [58]:
x_product = reduce(multiply, xs) # = 1 * 2 * 3 * 4 = 24
list_product = partial(reduce, multiply) # *function* that reduces a list
x_product = list_product(xs) # again = 24

NameError: name 'reduce' is not defined

enumerate

In [61]:
# not Pythonic
for i in range(len(documents)):
    document = documents[i]
    do_something(i, document)

# also not Pythonic
i = 0
for document in documents:
    do_something(i, document)
    i += 1

NameError: name 'documents' is not defined

for i, document in enumerate(documents):
    do_something(i, document)

In [63]:
for i in range(len(documents)): do_something(i) # not Pythonic
for i, _ in enumerate(documents): do_something(i) # Pythonic

NameError: name 'documents' is not defined

zip and Argument Unpacking

In [64]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
zip(list1, list2) # is [('a', 1), ('b', 2), ('c', 3)]

<zip at 0x24f705ba1c8>

In [65]:
pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)

In [66]:
zip(('a', 1), ('b', 2), ('c', 3))

<zip at 0x24f705baf08>

In [68]:
def add(a, b): return a + b


In [69]:
add(1, 2) # returns 3
add([1, 2]) # TypeError!
add(*[1, 2]) # returns 3

TypeError: add() missing 1 required positional argument: 'b'

# args and kwargs

In [1]:
def doubler(f):
    def g(x):
        return 2 * f(x)
    return g

In [3]:
def f1(x):
    return x + 1

g = doubler(f1)
print (g(3)) # 8 (== ( 3 + 1) * 2)
print (g(-1)) # 0 (== (-1 + 1) * 2)

8
0


In [5]:
def f2(x, y):
    return x + y
g = doubler(f2)
print g(1, 2) # TypeError: g() takes exactly 1 argument (2 given)

SyntaxError: invalid syntax (<ipython-input-5-02e53a7fcf7b>, line 4)

In [9]:
def magic(*args, **kwargs):
    print ("unnamed args:", args)
    print ("keyword args:", kwargs)
magic(1, 2, key="word", key2="word2")

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


In [11]:
def other_way_magic(x, y, z):
    return x + y + z
x_y_list = [1, 2]
z_dict = { "z" : 3 }
print (other_way_magic(*x_y_list, **z_dict)) # 6

6


In [15]:
def doubler_correct(f):
    """works no matter what kind of inputs f expects"""
    def g(*args, **kwargs):
        """whatever arguments g is supplied, pass them through to f"""
        return 2 * f(*args, **kwargs)
    return g

g = doubler_correct(f2)
print (g(1, 2)) # 6

6
