# Built-in Data Structures, Functions, and Files

### Tulpe

In [1]:
('foo', 'bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

In [2]:
tup = (4, 5, (6, 7))
a, b, (c, d) = tup
c

6

In [3]:
a, b = 1, 2
a, b = b, a
a

2

In [4]:
value = [1,2,3,4]
value1 = 3
print("{0}, {1}, {2}".format(id(value), id(value1), value))

4457697032, 4424666448, [1, 2, 3, 4]


In [5]:
value, value1 = value1, value
print("{0}, {1}, {2}".format(id(value), id(value1), value))

4424666448, 4457697032, 3


In [6]:
# A common use of variable unpacking is iterating over sequences of tuples or lists
seq=[(1,2,3),(4,5,6),(7,8,9)]

for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [7]:
values=1,2,3,4,5
type(values)     # tuple

a, b, *rest = values
type(rest)       # list
rest

[3, 4, 5]

### List

###### Concatenating and combining list

In [8]:
# Concatenating list
[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [9]:
# Extend list
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

In [10]:
# Note that list concatenation by addition is a comparatively expensive operation 
# since a new list must be created and the objects copied over. Using extend to 
# append elements to an existing list, especially if you are building up a 
# large list, is usually preferable. Thus,
everything = []
list_of_lists = [[4], [None], ['foo'], [7, 8], [(2, 3)]] * 1000

def extendEverything(everything, list_of_lists):
    for chunk in list_of_lists:
        everything.extend(chunk)
        

%timeit extendEverything(everything, list_of_lists)

419 µs ± 29.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [11]:
# is faster than the concatenative alternative below:
everything = []
list_of_lists = [[4], [None], ['foo'], [7, 8], [(2, 3)]] * 1000
def concatEverything(everything, list_of_lists):
    for chunk in list_of_lists:
        everything = everything + chunk
        

%timeit extendEverything(everything, list_of_lists)

410 µs ± 25.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


###### Binary search and maintaining a sorted list

In [12]:
# The built-in bisect module implements binary search and insertion into a sorted list.
import bisect

In [13]:
c = [1,2,2,2,3,4,7]

In [14]:
bisect.bisect(c, 2)

4

In [15]:
# The bisect module functions do not check whether the list is sor‐ ted, 
# as doing so would be computationally expensive
c

[1, 2, 2, 2, 3, 4, 7]

###### Slicing

In [16]:
seq=[7,2,3,7,5,6,0,1]

In [17]:
# A step can also be used after a second colon to, say, take every other element
seq[::2]

[7, 3, 5, 0]

In [18]:
# A clever use of this is to pass -1, which has the useful effect of reversing a list or tuple
seq[::-1]

[1, 0, 6, 5, 7, 3, 2, 7]

### Built-in Sequence Functions

###### enumerate

In [19]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[i] = v
    # mapping[v] = i
mapping

{0: 'foo', 1: 'bar', 2: 'baz'}

###### zip “pairs” up the elements of a number of lists, tuples, or other sequences to create a list of tuples

In [20]:
zipped?

Object `zipped` not found.


In [21]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [22]:
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [23]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, one
1: bar, two
2: baz, three


In [24]:
# Another way to think about this is 
# converting a list of rows into a list of columns. 
# The syntax, which looks a bit magical, is...
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)

In [25]:
first_names

('Nolan', 'Roger', 'Schilling')

In [26]:
last_names

('Ryan', 'Clemens', 'Curt')

### Dictionary

###### Creating dicts from sequences

In [27]:
# mapping = {}
# for key, value in zip(key_list, value_list):
#     mapping[key] = value
mapping = dict(zip(range(5), reversed(range(5))))
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

###### Default values

In [28]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0] 
    by_letter.setdefault(letter, []).append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

### Set

In [31]:
# Set is an unordered collection of unique elements. 
# You can think of them like dicts, but keys only, no values.
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [32]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [33]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [34]:
c = a.copy()
c |= b
c

{1, 2, 3, 4, 5, 6, 7, 8}

In [35]:
d = a.copy()
d &= b
d

{3, 4, 5}

### List, Set, and Dict Comprehensions

###### List comprehensions

In [36]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

###### Dict comprehensions

In [38]:
loc_mapping = {val:index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

###### Set comprehensions

In [39]:
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

###### Nested list comprehensions

In [41]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'], 
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar'], 
            ['Peral', 'Anson', 'Daan', 'Johnny', 'Dave']]

In [44]:
names_of_interest = []

for names in all_data:
    enough_es = [name for name in names if name.count('n') >= 1]
    names_of_interest.extend(enough_es)

names_of_interest

['John', 'Steven', 'Juan', 'Anson', 'Daan', 'Johnny']

In [47]:
result = [name for names in all_data for name in names if name.count('n') >= 1]

result

['John', 'Steven', 'Juan', 'Anson', 'Daan', 'Johnny']

In [48]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]

flattened
##### Equal to 
#flattened = []
#for tup in some_tuples:
#    for x in tup:
#        flattened.append(x)


[1, 2, 3, 4, 5, 6, 7, 8, 9]

### Functions Are Objects

In [49]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south   carolina##', 'West virginia?']

In [51]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [52]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [53]:
#####################################################################################################################
# An alternative approach that you may find useful is to make a list of the operations you want to apply to strings #
#####################################################################################################################
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings2(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [54]:
clean_strings2(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']