# Built-in Data Structures, Functions, and Files

### Tulpe

In [1]:
('foo', 'bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

In [2]:
tup = (4, 5, (6, 7))
a, b, (c, d) = tup
c

6

In [3]:
a, b = 1, 2
a, b = b, a
a

2

In [4]:
value = [1,2,3,4]
value1 = 3
print("{0}, {1}, {2}".format(id(value), id(value1), value))

4463361544, 4425522608, [1, 2, 3, 4]


In [5]:
value, value1 = value1, value
print("{0}, {1}, {2}".format(id(value), id(value1), value))

4425522608, 4463361544, 3


In [6]:
# A common use of variable unpacking is iterating over sequences of tuples or lists
seq=[(1,2,3),(4,5,6),(7,8,9)]

for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [7]:
values=1,2,3,4,5
type(values)     # tuple

a, b, *rest = values
type(rest)       # list
rest

[3, 4, 5]

* * *

### List

###### Concatenating and combining list

In [8]:
# Concatenating list
[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [9]:
# Extend list
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

In [10]:
# Note that list concatenation by addition is a comparatively expensive operation 
# since a new list must be created and the objects copied over. Using extend to 
# append elements to an existing list, especially if you are building up a 
# large list, is usually preferable. Thus,
everything = []
list_of_lists = [[4], [None], ['foo'], [7, 8], [(2, 3)]] * 1000

def extendEverything(everything, list_of_lists):
    for chunk in list_of_lists:
        everything.extend(chunk)
        

%timeit extendEverything(everything, list_of_lists)

714 µs ± 119 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [11]:
# is faster than the concatenative alternative below:
everything = []
list_of_lists = [[4], [None], ['foo'], [7, 8], [(2, 3)]] * 1000
def concatEverything(everything, list_of_lists):
    for chunk in list_of_lists:
        everything = everything + chunk
        

%timeit extendEverything(everything, list_of_lists)

597 µs ± 46.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


###### Binary search and maintaining a sorted list

In [12]:
# The built-in bisect module implements binary search and insertion into a sorted list.
import bisect

In [13]:
c = [1,2,2,2,3,4,7]

In [14]:
bisect.bisect(c, 2)

4

In [15]:
# The bisect module functions do not check whether the list is sor‐ ted, 
# as doing so would be computationally expensive
c

[1, 2, 2, 2, 3, 4, 7]

###### Slicing

In [16]:
seq=[7,2,3,7,5,6,0,1]

In [17]:
# A step can also be used after a second colon to, say, take every other element
seq[::2]

[7, 3, 5, 0]

In [18]:
# A clever use of this is to pass -1, which has the useful effect of reversing a list or tuple
seq[::-1]

[1, 0, 6, 5, 7, 3, 2, 7]

----------

### Built-in Sequence Functions

###### enumerate

In [19]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[i] = v
    # mapping[v] = i
mapping

{0: 'foo', 1: 'bar', 2: 'baz'}

###### zip “pairs” up the elements of a number of lists, tuples, or other sequences to create a list of tuples

In [20]:
zipped?

Object `zipped` not found.


In [21]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [22]:
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [23]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, one
1: bar, two
2: baz, three


In [24]:
# Another way to think about this is 
# converting a list of rows into a list of columns. 
# The syntax, which looks a bit magical, is...
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)

In [25]:
first_names

('Nolan', 'Roger', 'Schilling')

In [26]:
last_names

('Ryan', 'Clemens', 'Curt')

---------

### Dictionary

###### Creating dicts from sequences

In [27]:
# mapping = {}
# for key, value in zip(key_list, value_list):
#     mapping[key] = value
mapping = dict(zip(range(5), reversed(range(5))))
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

###### Default values

In [28]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0] 
    by_letter.setdefault(letter, []).append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

-------

### Set

In [29]:
# Set is an unordered collection of unique elements. 
# You can think of them like dicts, but keys only, no values.
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [30]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [31]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [32]:
c = a.copy()
c |= b
c

{1, 2, 3, 4, 5, 6, 7, 8}

In [33]:
d = a.copy()
d &= b
d

{3, 4, 5}

-------

### List, Set, and Dict Comprehensions

###### List comprehensions

In [34]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

###### Dict comprehensions

In [35]:
loc_mapping = {val:index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

###### Set comprehensions

In [36]:
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

###### Nested list comprehensions

In [37]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'], 
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar'], 
            ['Peral', 'Anson', 'Daan', 'Johnny', 'Dave']]

In [38]:
names_of_interest = []

for names in all_data:
    enough_es = [name for name in names if name.count('n') >= 1]
    names_of_interest.extend(enough_es)

names_of_interest

['John', 'Steven', 'Juan', 'Anson', 'Daan', 'Johnny']

In [39]:
result = [name for names in all_data for name in names if name.count('n') >= 1]

result

['John', 'Steven', 'Juan', 'Anson', 'Daan', 'Johnny']

In [40]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]

flattened
##### Equal to 
#flattened = []
#for tup in some_tuples:
#    for x in tup:
#        flattened.append(x)


[1, 2, 3, 4, 5, 6, 7, 8, 9]

-------

### Functions Are Objects

In [41]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south   carolina##', 'West virginia?']

In [42]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [43]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [44]:
#####################################################################
# An alternative approach that you may find useful is to make       #
# a list of the operations you want to apply to strings             #
#####################################################################
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings2(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [45]:
clean_strings2(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [46]:
# You can use functions as arguments to other functions like the built-in map function, 
# which applies a function to a sequence of some kind
for x in map(remove_punctuation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


-------

### Anonymous (Lambda) Functions

In [47]:
def short_function(x): 
    return x*2
    
equiv_anon = lambda x: x * 2

In [48]:
equiv_anon(5)

10

In [49]:
def apply_to_list(some_list, f): 
    return [f(x) for x in some_list]

ints=[4,0,1,5,6] 
apply_to_list(ints, lambda x: x * 2)

[8, 0, 2, 10, 12]

In [50]:
# Suppose you wanted to sort a collection of strings by the number of distinct letters in each string
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

strings.sort(key=lambda x: len(set(list(x))))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

###### Currying: Partial Argument Application

In [51]:
# Currying is computer science jargon (named after the mathematician Haskell Curry) 
# that means deriving new functions from existing ones by partial argument application
def add_numbers(x, y): 
    return x+y

add_five = lambda y: add_numbers(5, y)  # The second argument to add_numbers is said to be curried
add_five(3)

8

In [54]:
# The built-in functools module can simplify this process using the partial function:
from functools import partial 
add_seven = partial(add_numbers, 7)

add_seven(8)

15

------

### Generator

In [61]:
# iterating over a dict yields the dict keys:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for k in some_dict:
    print(k)

a
b
c


In [62]:
# When you write for key in some_dict, 
# the Python interpreter first attempts to cre‐ ate an iterator out of some_dict:
dict_iterator = iter(some_dict)
dict_iterator

<dict_keyiterator at 0x1238fa7c8>

In [63]:
# An iterator is any object that will yield objects to 
# the Python interpreter when used in a context like a for loop.
list(dict_iterator)

['a', 'b', 'c']

A generator is a concise way to construct a new iterable object. Whereas normal func‐ tions execute and return a single result at a time, generators return a sequence of multiple results lazily, pausing after each one until the next one is requested

In [74]:
def squares(n=10):
    print('Generating squares from 1 to {0}'.format(n ** 2)) 
    for i in range(1, n + 1):
        yield i**2

In [75]:
# When you actually call the generator, no code is immediately executed
gen = squares()
gen

<generator object squares at 0x12391be60>

In [73]:
# It is not until you request elements from the generator that it begins executing its code
for x in gen:
    print(x, end=' ')

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

###### Generator expresssions
Another even more concise way to make a generator is by using a generator expres‐ sion. This is a generator analogue to list, dict, and set comprehensions; to create one, enclose what would otherwise be a list comprehension within parentheses instead of brackets

In [79]:
gen = (x ** 2 for x in range(100))
gen

<generator object <genexpr> at 0x12393d0a0>

In [81]:
# This is completely equivalent to the following more verbose generator:
def _make_gen():
    for x in range(100):
        yield x**2 
gen = _make_gen()
gen

<generator object _make_gen at 0x12391b9e8>

###### itertools module

In [82]:
import itertools

In [87]:
first_letter = lambda x: x[0]

In [88]:
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

In [89]:
# groupby takes any sequence and a function, 
# grouping consecutive elements in the sequence by return value of the function
for letter, names in itertools.groupby(names, first_letter): 
    print(letter, list(names)) # names is a generator

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


### Error and Exception Handling

In [90]:
float('1.2345')

1.2345

In [91]:
float('something')

ValueError: could not convert string to float: 'something'

In [94]:
def attempt_float(x): 
    try:
        return float(x) 
    except:
        return x

attempt_float('something')

'something'

In [98]:
# You might want to only suppress ValueError, 
# since a TypeError (the input was not a string or numeric value) 
# might indicate a legitimate bug in your program
def attempt_float2(x): 
    try:
        return float(x) 
    except ValueError:
        return x
# Then
attempt_float2((1, 2))

TypeError: float() argument must be a string or a number, not 'tuple'

In [99]:
# You can catch multiple exception types
def attempt_float3(x): 
    try:
        return float(x) 
    except (ValueError, TypeError):
        return x

In [106]:
path = "./test"
f = open(path, 'w')
try:
    write_to_file(f)
except (TypeError, ValueError) as e:
    print('Failed: {}'.format(e.strerror))
else:
    print("Succeeded")
finally: 
    f.close()

NameError: name 'write_to_file' is not defined