# Chapter 3. 

# Built-in Data Structures, Functions, and Files

## 3.1 Data Structures and Sequences

### Tuple

In [1]:
# A tuple is a fixed-length, immutable sequence of Python objects. The easiest way to 
# create one is with a comma-separated sequence of values:

In [2]:
tup = 1,2,3
tup

(1, 2, 3)

In [3]:
# Create a tuple of tuples

In [4]:
nested_tup = (1,2,3) , (4,5)
nested_tup

((1, 2, 3), (4, 5))

In [5]:
# You can convert any sequence or iterator to a tuple by invoking tuple:

In [6]:
print(tuple([1,2,3]), '    ', tuple('the text'))

(1, 2, 3)      ('t', 'h', 'e', ' ', 't', 'e', 'x', 't')


In [7]:
tup = tuple('the text')
tup[0]

't'

In [8]:
# While the objects stored in a tuple may be mutable themselves, once the tuple is created 
# it’s not possible to modify which object is stored in each slot:

In [9]:
tup = tuple([1,2,True,[3,4]])
# this -> tup[2] = False , will give an error

In [10]:
# If an object inside a tuple is mutable, such as a list, you can modify it in-place:

tup[3].append([5,6,7])
tup

(1, 2, True, [3, 4, [5, 6, 7]])

In [11]:
# Concatenate tuples using + operator to produce longer tuples:

(1,2,3) + ([1,2],True) + (5,6,'some text') + (1,)

(1, 2, 3, [1, 2], True, 5, 6, 'some text', 1)

In [12]:
# Multiplying a tuple by an integer, as with lists, has the effect of concatenating together
# that many copies of the tuple:

In [13]:
print(('text', 'message', 2)*4)

('text', 'message', 2, 'text', 'message', 2, 'text', 'message', 2, 'text', 'message', 2)


#### Unpacking Tuples

In [14]:
tup = (4,5,6)
a, b, c = tup
print(a,b,c)

4 5 6


In [15]:
tup = 4, 5, (6, 7)
a, b, (c, d) = tup
print(a, b, c, d)

4 5 6 7


In [16]:
# Swap the variables like this:

a, b = 1, 2
print(a,b)

1 2


In [17]:
# A common use of variable unpacking is iterating over sequences of tuples or lists:

In [18]:
seq = [(1,2,3), (4,5,6), (7,8,9)]

for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a,b,c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [19]:
a, b, c = [(1,2,3), (4,5,6), (7,8,9)]
a

(1, 2, 3)

In [20]:
# '*rest', which is also used in function signatures to capture an arbitrarily long list of positional arguments:

In [21]:
values = 1,2,3,4,5,6,7,8,9
a, b, *rest = values
print(a, b, rest)

1 2 [3, 4, 5, 6, 7, 8, 9]


In [22]:
values = 1,2,3,4,5,6,7,8,9
a, b, *_ = values
print(a, b, _)

1 2 [3, 4, 5, 6, 7, 8, 9]


In [23]:
# Count:   counts the number of occurrences of a value:

a = (1,2,2,2,2,2,3,4,5,6)
a.count(2)

5

### List

In [24]:
# In contrast with tuples, lists are variable-length and their contents can be modified
# in-place. You can define them using square brackets [] or using the list type function:

In [25]:
a_list = [1,2,3,None]
tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list

['foo', 'bar', 'baz']

In [26]:
b_list[1] = 'pekaboo'
b_list

['foo', 'pekaboo', 'baz']

In [27]:
# The list function is frequently used in data processing as a way to materialize an
# iterator or generator expression:

In [28]:
gen = range(10)
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

#### Adding and removing elements

In [29]:
# Elements can be "appended" to the end of the list with the append method:

In [30]:
b_list.append('ggg')
b_list

['foo', 'pekaboo', 'baz', 'ggg']

In [31]:
# Using "insert" you can insert an element at a specific location in the list:

In [32]:
b_list.insert(1,'inserted')

In [33]:
b_list

['foo', 'inserted', 'pekaboo', 'baz', 'ggg']

In [34]:
# The inverse operation to insert is "pop", which removes and returns an element at a particular index:

In [35]:
b_list.pop(3)

'baz'

In [36]:
b_list

['foo', 'inserted', 'pekaboo', 'ggg']

In [37]:
# Elements can be removed by value with "remove", which locates the first such value and removes it from the last:

In [38]:
b_list.append('foo')
b_list

['foo', 'inserted', 'pekaboo', 'ggg', 'foo']

In [39]:
b_list.remove('foo')

In [40]:
b_list

['inserted', 'pekaboo', 'ggg', 'foo']

In [41]:
# Check if a list contains a value using the in keyword:

In [42]:
print('ggg' in b_list, ',', 'someting' not in b_list)

True , True


In [43]:
# Checking whether a list contains a value is a lot slower than doing so with dicts and
# sets (to be introduced shortly), as Python makes a linear scan across the values of the
# list, whereas it can check the others (based on hash tables) in constant time.

#### Concatenating and combining lists

In [44]:
# Similar to tuples, adding two lists together with + concatenates them:

In [45]:
[4, None, 'text'] + [7, 8, (2,3)]

[4, None, 'text', 7, 8, (2, 3)]

In [46]:
# Append multiple elements by using the 'extend' method:

In [47]:
x = [4, None, 'text']
x.extend([7, 8, (2,3)])
x

[4, None, 'text', 7, 8, (2, 3)]

In [48]:
# !!!! 'Extend' vs '+':  'Extend' is much preferable than '+' in case of expensivenes 
# ('+' is expensive, since a new list must be created and the objects copied over.)

In [49]:
list_of_lists = [[1,2,3,4], [2,3,4,4],[None, 'hhh']]
everything = []
for chunk in list_of_lists:
    everything.extend(chunk)
    
everything

[1, 2, 3, 4, 2, 3, 4, 4, None, 'hhh']

In [50]:
everything = []
for chunk in list_of_lists: 
    everything = everything + chunk
everything

[1, 2, 3, 4, 2, 3, 4, 4, None, 'hhh']

#### Sorting

In [51]:
# You can sort a list in-place (without creating a new object) by calling its sort function:

In [52]:
a = [8,5,7,3,4,2,9,8,23,1,2,4]
a.sort()
a

[1, 2, 2, 3, 4, 4, 5, 7, 8, 8, 9, 23]

In [53]:
b = ['text', 'abc', 'ab', 'abbbaa', 'abcsd']
b.sort(key=len)
b

['ab', 'abc', 'text', 'abcsd', 'abbbaa']

#### Binary search and maintaining a sorted list

In [54]:
# bisect.bisect finds the location where an element should be inserted to keep it sorted

In [55]:
import bisect

c = [1,2,2,2,5,6,7,9,15]

bisect.bisect(c,14)

8

In [56]:
# bisect.insort actually inserts the element into that location:

In [57]:
bisect.insort(c,14)
c

[1, 2, 2, 2, 5, 6, 7, 9, 14, 15]

In [58]:
# Warning!!! 
# The bisect module functions do not check whether the list is sorted,
# as doing so would be computationally expensive. Thus, using
# them with an unsorted list will succeed without error but may lead
# to incorrect results.

#### Slicing

In [59]:
# Slice notation consists of start:stop passed to the indexing operator []:

In [60]:
seq = [1,2,3,4,5,6,7,8,9]
seq[1:5]

[2, 3, 4, 5]

In [61]:
# Slices can also be assigned to with a sequence:

In [62]:
seq[3:4] = [15,16]
seq

[1, 2, 3, 15, 16, 5, 6, 7, 8, 9]

In [63]:
# While the element at the start index is included, the stop index is not included, so
# that the number of elements in the result is stop - start.

In [64]:
# Either the start or stop can be omitted, in which case they default to the start of the
# sequence and the end of the sequence, respectively:

In [65]:
seq[:5]

[1, 2, 3, 15, 16]

In [66]:
seq[3:]

[15, 16, 5, 6, 7, 8, 9]

In [67]:
# Negative indices slice the sequence relative to the end:

In [68]:
seq[-4:]

[6, 7, 8, 9]

In [69]:
seq[-6:-2]

[16, 5, 6, 7]

In [70]:
seq[::2] # takes every second item in the list

[1, 3, 16, 6, 8]

In [71]:
seq[::-1] # takes every element starting from the end of the list

[9, 8, 7, 6, 5, 16, 15, 3, 2, 1]

### Built-in Sequence Functions

#### enumerate

In [72]:
# A lot of times when dealing with iterators, we also get a need to keep a count of iterations

In [73]:
# Enumerate() method adds a counter to an iterable and returns it in a form of enumerate object

In [74]:
# This enumerate object can then be used directly in for loops or be converted into a list of tuples using list() method.

In [75]:
some_list = ["eat","sleep","repeat", "repeat", "repeat"]

for ele in enumerate(some_list):
    print (ele)

(0, 'eat')
(1, 'sleep')
(2, 'repeat')
(3, 'repeat')
(4, 'repeat')


In [76]:
for count,ele in enumerate(some_list): 
    print (count,ele) 

0 eat
1 sleep
2 repeat
3 repeat
4 repeat


In [77]:
l1 = ["eat","sleep","repeat"] 
s1 = "geek"
  
# creating enumerate objects 
obj1 = enumerate(l1) 
obj2 = enumerate(s1) 
  
print ("Return type:",type(obj1))
print (list(enumerate(l1))) 
  
# changing start index to 2 from 0 
print (list(enumerate(s1,2))) 

Return type: <class 'enumerate'>
[(0, 'eat'), (1, 'sleep'), (2, 'repeat')]
[(2, 'g'), (3, 'e'), (4, 'e'), (5, 'k')]


In [78]:
some_list_1 = ['foo', 'bar', 'baz']
mapping = {}

for i, v in enumerate(some_list_1):
    mapping[v] = i
    
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

#### sorted

In [79]:
# The "sorted" function returns a new sorted list from the elements of any sequence:
# This sorting is the same as in lists:

In [80]:
sorted([5,8,2,3,7,9,1,2,3])

[1, 2, 2, 3, 3, 5, 7, 8, 9]

In [81]:
sorted('the text')

[' ', 'e', 'e', 'h', 't', 't', 't', 'x']

#### zip

In [82]:
# zip “pairs” up the elements of a number of lists, tuples, or other sequences to create a list of tuples:

In [83]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = list(zip(seq1,seq2))
zipped

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [84]:
# zip can take an arbitrary number of sequences, and the number of elements it produces
# is determined by the shortest sequence:

In [85]:
seq3 = [False, True]

list(zip(seq1,seq3))
list(zip(seq1,seq2,seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [86]:
# A very common use of zip is simultaneously iterating over multiple sequences, possibly
# also combined with enumerate:

In [87]:
for i, (a,b) in enumerate(zip(seq1,seq2)):
    print('{0}: {1}, {2}'.format(i,a,b))

0: foo, one
1: bar, two
2: baz, three


In [88]:
for i, (a,b,c) in enumerate(zip(seq1,seq2,seq3),50):
    print('{0}: {1}, {2}, {3}'.format(i,a,b,c))

50: foo, one, False
51: bar, two, True


In [89]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]

first_name, last_name = zip(*pitchers)
print(first_name, '    ', last_name)
list(first_name+last_name)

('Nolan', 'Roger', 'Schilling')      ('Ryan', 'Clemens', 'Curt')


['Nolan', 'Roger', 'Schilling', 'Ryan', 'Clemens', 'Curt']

#### reversed

In [90]:
# reversed iterates over the elements of a sequence in reverse order:

In [91]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

### dict

In [92]:
empty_dict = {}

d1 = {'a': 'some value', 'b': [1,2,3,4]}
d1

{'a': 'some value', 'b': [1, 2, 3, 4]}

In [93]:
# You can access, insert, or set elements using the same syntax as for accessing elements of a list or tuple:

d1['a new key'] = 'some string'
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 'a new key': 'some string'}

In [94]:
d1['b']

[1, 2, 3, 4]

In [95]:
# You can check if a dict contains a key using the same syntax used for checking whether a list or tuple contains a value:

In [96]:
'b' in d1

True

In [97]:
d1[7], d1[5] = 'an integer', 'some value'
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 'a new key': 'some string',
 7: 'an integer',
 5: 'some value'}

In [98]:
# You can delete values either using the del keyword or the pop method (which simultaneously 
# returns the value and deletes the key):

In [99]:
d1['dummy'] = 'another value'
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 'a new key': 'some string',
 7: 'an integer',
 5: 'some value',
 'dummy': 'another value'}

In [100]:
del d1['a new key']

In [101]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 5: 'some value',
 'dummy': 'another value'}

In [102]:
del d1[5]

In [103]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dummy': 'another value'}

In [104]:
ret = d1.pop('dummy')

In [105]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [106]:
ret

'another value'

In [107]:
# The keys and values method give you iterators of the dict’s keys and values, respectively.
# While the key-value pairs are not in any particular order, these functions output
# the keys and values in the same order:

In [108]:
print(list(d1.keys()), '    ', list(d1.values()))

['a', 'b', 7]      ['some value', [1, 2, 3, 4], 'an integer']


In [109]:
# You can 'merge' one dict into another using the update method:
# The update method changes dicts in-place, so any existing keys in the data passed to 
# update will have their old values discarded.

#### Creating dicts from sequences

In [110]:
# It’s common to occasionally end up with two sequences that you want to pair up
# element-wise in a dict. As a first cut, you might write code like this:

In [111]:
key_list = ['a', 'b', 'c', 'd']
value_list = [1,2,3,4]

mapping = {}
for key, value in zip(key_list,value_list):
    mapping[key] = value

mapping

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [112]:
p = zip(key_list,value_list)
list(p)

[('a', 1), ('b', 2), ('c', 3), ('d', 4)]

In [113]:
# Since a dict is essentially a collection of 2-tuples, the dict function accepts a list of 2-tuples:

In [114]:
another_dict = dict(zip(range(5),reversed(range(5))))
another_dict

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

#### Default values

In [115]:
some_dict = {'key1':1, 'key2':2, 'key3':3, 'key4':4}
default_value = 5

if key in some_dict:
    value = some_dict[key]
else:
    value = default_value
    
value

5

In [116]:
# The dict methods get and pop can take a default value to be returned, so that
# the above if-else block can be written simply as:

In [117]:
value = some_dict.get(key, default_value)
value

5

In [118]:
# You could imagine categorizing a list of words by their first letters as a dict of lists:

In [119]:
words = ['apple', 'bat', 'bar', 'baz', 'atom']

by_letter = {}

for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'baz']}

In [120]:
# OR::: we can write this way:
# The "setdefault" dict method is for precisely this purpose. The preceding for loop can be rewritten as:

In [121]:
wordss = ['apple', 'bat', 'bar', 'baz', 'atom']
by_letters = {}

for word in wordss:
    letter = word[0]
    by_letters.setdefault(letter,[]).append(word)
    
by_letters

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'baz']}

In [122]:
from collections import defaultdict

by_letterrs = defaultdict(list)

for word in words:
    letter = word[0]
    by_letterrs[letter].append(word)
   #by_letterrs[word[0]].append(word) 
by_letterrs

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'baz']})

#### valid dict key types

In [123]:
# 'Values' can be any Python object type:
# 'Keys' have to be immutable object types (int, float, string) 
#  or tuples (all the object in the toople have to be immutable too)

# Technical term here is 'hashability'. You can check whether an object is hashable (can be used as a key in a dict) or not.

In [124]:
hash('string')

-6255104124141719765

In [125]:
hash((1,2,(1,3)))

-43633473679719345

In [126]:
# hash(1,2,[3,5]) 

# fails because lists are mutable object types

In [127]:
# Lists also can be used as a key by converting it into a tuple:

d = {}

d[tuple([1,2,3,4])] = 5
d

{(1, 2, 3, 4): 5}

### set

In [128]:
# A set is an unordered cllection of unique elements.
# You can think of them as dicts but keys only, not values. 

# We can create set via 'set' function or '{}'.

print(set([1,2,3,4,5,6,7,8,9]), '     ', {1,2,3,4,5,6,7,8,9})

{1, 2, 3, 4, 5, 6, 7, 8, 9}       {1, 2, 3, 4, 5, 6, 7, 8, 9}


In [129]:
# UNION of the sets: by 'union' or '|'

set_a = {1,2,3,2,3}
set_b = {2,8,7,9,6,3}

print(set_a.union(set_b), '    ', set_a | set_b)

# The result is the set of distinct elementsoccuring in either set.

{1, 2, 3, 6, 7, 8, 9}      {1, 2, 3, 6, 7, 8, 9}


In [130]:
# INTTERSECTION: This contains the elements occuring in both sets.
# By: 'intersetcion' and '&'

print(set_a.intersection(set_b), '    ', set_a & set_b)

{2, 3}      {2, 3}


In [131]:
# OTHER FUNCTIONS for SET:

set_a.add(5)
# set_a.clear --- gives empty set
set_a.remove(5)
set_a.pop() # Remove an arbitrary element from the set a.

set_a.update(set_b)
set_a |= set_b # Set the contents of a to be the union of the elements in a and b

set_a.intersection_update(set_b)
set_a &= set_b # Set the contents of a to be the intersection of the elements in a and b

set_a.difference(set_b)
set_a - set_b 

set_a.difference_update(set_b)
set_a -= set_b

set_a.symmetric_difference(set_b)
set_a ^ set_b # All of the elements in either a or b but not both

set_a.symmetric_difference_update(set_b)
set_a ^= set_b

set_a.issubset(set_b)   # True if the elements of a are all contained in b
set_a.issuperset(set_b) # True if the elements of b are all contained in a
set_a.isdisjoint(set_b) # True if a and b have no elements in common

True

### List, Set, and Dict Comprehensions

#### List comprehensions

In [132]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

#### Dict comprehensions

In [133]:
# dict_comp = {key-expr : value-expr for value in collection if condition}

#### Set comprehensions

In [134]:
# A set comprehension looks like the equivalent list comprehension except with curly braces instead of square brackets:

In [135]:
# set_comp = {expr for value in collection if condition}

In [136]:
# Suppose we wanted a set containing just the lengths of the strings contained
# in the collection; we could easily compute this using a set comprehension:

In [137]:
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [138]:
# OR: we can do this by using 'map' function:
set(map(len,strings))

{1, 2, 3, 4, 6}

In [139]:
# As a simple dict comprehension example, we could create a lookup map of these strings to their locations in the list:

In [140]:
loc_mapping = {value: index for index, value in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

#### Nested List Comprehensions

In [148]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'], ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

names_of_interest = []

for names in all_data:
    enough_es = [name for name in names if name.count('e') >= 2]
    names_of_interest.extend(enough_es)
    
names_of_interest

['Steven']

In [149]:
# OOOR we could do like this

result = [name for names in all_data for name in names if name.count('e') >= 2]
result

['Steven']

In [150]:
# At first, nested list comprehensions are a bit hard to wrap your head around. The for
# parts of the list comprehension are arranged according to the order of nesting, and
# any filter condition is put at the end as before. Here is another example where we
# “flatten” a list of tuples of integers into a simple list of integers:

In [151]:
some_tuple = [(1,2,3), (4,5,6), (7,8,9)]

flattened = [j for x in some_tuple for j in x]
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [152]:
# Keep in mind that the order of the for expressions would be the same if you wrote a
# nested for loop instead of a list comprehension:

flattened = []

for i in some_tuple:
    for j in i:
        flattened.append(j)
        
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [153]:
# You can have arbitrarily many levels of nesting, though if you have more than two or
# three levels of nesting you should probably start to question whether this makes sense
# from a code readability standpoint. It’s important to distinguish the syntax just shown
# from a list comprehension inside a list comprehension, which is also perfectly valid:

In [154]:
[[j for j in x] for x in some_tuple]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

## 3.2 Functions

In [155]:
# Functions are declared with the def keyword and returned from with the return keyword:

In [156]:
def my_function(x, y, z=1.5):
    if z > 1:
        return z*(x+y)
    else:
        return z/(x+y)
    
my_function(4,8,1.5)

18.0

In [157]:
# Each function can have positional arguments and keyword arguments. Keyword arguments
# are most commonly used to specify default values or optional arguments. In
# the preceding function, x and y are positional arguments while z is a keyword argument.
# This means that the function can be called in any of these ways:

my_function(5, 6, z=0.7)
my_function(3.14, 7, 3.5)
my_function(10, 20)

45.0

In [158]:
# It is possible to use keywords for passing positional arguments as
# well. In the preceding example, we could also have written:

my_function(x=5, y=6, z=7)
my_function(y=6, x=5, z=7)

77

### Namespaces, Scope, and Local Functions

In [None]:
# Functions can access variables in two different scopes: global and local. An alternative
# and more descriptive name describing a variable scope in Python is a namespace. Any
# variables that are assigned within a function by default are assigned to the local
# namespace. The local namespace is created when the function is called and immediately
# populated by the function’s arguments. After the function is finished, the local
# namespace is destroyed (with some exceptions that are outside the purview of this
# chapter). Consider the following function:

In [170]:
def func():
    a = []
    for i in range(5):
        a.append(i)

        
func()
print(a)

[]


In [166]:
# When func() is called, the empty list a is created, five elements are appended, and
# then a is destroyed when the function exits. Suppose instead we had declared a as
# follows:

a = []

def func():
    for i in range(5):
        a.append(i)

func()
print(a)

In [None]:
# Assigning variables outside of the function’s scope is possible, but those variables
# must be declared as global via the global keyword:

In [169]:
a = None

def bind_a_variable():
    global a
    a = []

bind_a_variable()

print(a)

[]


In [171]:
# Warning!!! I generally discourage use of the global keyword. Typically global
# variables are used to store some kind of state in a system. If you
# find yourself using a lot of them, it may indicate a need for objectoriented
# programming (using classes).

### Returning Multiple Values

In [172]:
def f():
    a = 5
    b = 6
    c = 7
    return a, b, c

a, b, c = f()

In [173]:
return_value = f()

In [None]:
# In this case, return_value would be a 3-tuple with the three returned variables. A
# potentially attractive alternative to returning multiple values like before might be to
# return a dict instead:

In [174]:
def f():
    a = 5
    b = 6
    c = 7
    return {'a' : a, 'b' : b, 'c' : c}

### Functions Are Objects

In [1]:
# Suppose we were doing some data cleaning and
# needed to apply a bunch of transformations to the following list of strings:

states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']

In [3]:
# Lots of things need to happen to make this list of strings uniform and
# ready for analysis: stripping whitespace, removing punctuation symbols, and standardizing
# on proper capitalization. One way to do this is to use built-in string methods
# along with the re standard library module for regular expressions:

import re

def clean_strings(strings):
    result = []
    for value in states:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [8]:
# An alternative approach that you may find useful is to make a list of the operations
# you want to apply to a particular set of strings:

def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [10]:
for x in map(remove_punctuation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


### Anonymous (Lambda) Functions

In [2]:
# Python has support for so-called anonymous or lambda functions, which are a way of
# writing functions consisting of a single statement, the result of which is the return
# value. They are defined with the lambda keyword, which has no meaning other than
# “we are declaring an anonymous function”:

def short_function(x):
    return x*2

equiv_anon = lambda x: x*2
equiv_anon(5)

10

In [10]:
# Examples form the internet:
# Lambda can take multiple arguments and only one expression:

# Ex 1.
remainder = lambda num: num % 2
print(remainder(5))

1


In [11]:
# You should notice that the lambda function in the above script has not been assigned any name. 
# It simply returns a function object which is assigned to the identifier remainder. 
# However, despite being anonymous, it was possible for us to call it in the same way that we call a normal function. 

# Ex 2.
# Similar to Ex 1.
def remainder(num):
    return num % 2
remainder(50)

0

In [12]:
# Ex 3.
product = lambda x, y: x * y
print(product(2, 3))

6


In [17]:
# Why Use Lambda Function:

# 1. The use of anonymous function inside another function
def testfunc(num):
    return lambda x: x**num
result1 = testfunc(5)
print(result1(1))

1


In [18]:
# Filter Function:

# filter(object, iterable): The object here should be a lambda function which returns a boolean value. 
# The object will be called for every item in the iterable to do the evaluation. 
# The result is either a True or a False for every item. Note that the function can only take one iterable as the input.

some_list = [1,2,3,4,5,6,7,8,9]
filtered_list = list(filter(lambda num: (num > 7), some_list))
print(filtered_list)

[8, 9]


In [22]:
# The map() Function:

# map(object, iterable_1, iterable_2, ...): 
some_list1 = [1,2,3,3,3,3,3,5,6,7]
some_list2 = [2,3,5,6,8,7,9,8,10,2]
mapped_list = list(map(lambda num: (num % 2) , some_list))
print(mapped_list)

[1, 0, 1, 0, 1, 0, 1, 0, 1]


In [23]:
# They are especially
# convenient in data analysis because, as you’ll see, there are many cases where data
# transformation functions will take functions as arguments. It’s often less typing (and
# clearer) to pass a lambda function as opposed to writing a full-out function declaration
# or even assigning the lambda function to a local variable.

In [25]:
def apply_to_list (some_list, f):
    return [f(x) for x in some_list]

the_list = [4,1,0,10,8,12,0,9]
apply_to_list(the_list, lambda x: x*2)

# We could write this way as well:
# [x * 2 for x in ints]

[8, 2, 0, 20, 16, 24, 0, 18]

In [32]:
# Suppose you wanted to sort a collection of strings by the number of distinct letters in each string:

strings = ['ababa', 'aaa', 'abg', 'abcdd']
strings.sort(key = lambda x: list(set(list(x))))

### Currying: Partial Argument Application

In [37]:
# Suppose we had a trivial function that adds two numbers together:
def adding(x,y):
    return x + y

# Using this function, we could derive a new function of one variable, add_five, that
# adds 5 to its argument:

add_five = lambda y: adding(5,y)

# The second argument to adding is said to be curried. There’s nothing very fancy
# here, as all we’ve really done is define a new function that calls an existing function.
# The built-in functools module can simplify this process using the "partial" function:

from functools import partial
add_five = partial(adding, 5)
add_five(15)

20

### Generators

In [43]:
# Having a consistent way to iterate over sequences, like objects in a list or lines in a
# file, is an important Python feature. This is accomplished by means of the iterator
# protocol, a generic way to make objects iterable. For example, iterating over a dict
# yields the dict keys:

some_dict = {'a':1, 'b':2, 'c':3}
for key in some_dict:
    print(key)

['a']
['b']
['c']


In [45]:
# When you write for key in some_dict, the Python interpreter first attempts to create
# an iterator out of some_dict:

dict_iterator = iter(some_dict)
list(dict_iterator)

['a', 'b', 'c']

### Errors and Exception Handling

In [47]:
# As an example, Python’s float function is capable of casting a string
# to a floating-point number, but fails with ValueError on improper inputs:

In [48]:
float('1.568')

1.568

In [49]:
# float('something') this will return an error

# Suppose we wanted a version of float that fails gracefully, returning the input argument.
# We can do this by writing a function that encloses the call to float in a try/
# except block:

def attempt(x):
    try:
        return float(x)
    except:
        return x
    
attempt('something')

# The code in the except part of the block will only be executed if float(x) raises an
# exception:

'something'

In [50]:
attempt('1.235')

1.235

In [52]:
# You can catch multiple exception types by writing a tuple of exception types instead
def attempt(x):
    try:
        return float(x)
    except(TypeError, ValueError):
        return x
    
attempt('something')

'something'

In [53]:
# In some cases, you may not want to suppress an exception, but you want some code
# to be executed regardless of whether the code in the try block succeeds or not. To do
# this, use 'finally':

In [55]:
# f = open(path, 'w')
# 
# try:
#     write_to_file(f)
# finally:
#     f.close()

In [None]:
# Here, the file handle f will always get closed. Similarly, you can have code that executes
# only if the try: block succeeds using else:

In [None]:
# f = open(path, 'w')
# 
# try:
#     write_to_file(f)
# except:
#     print('fail')
# else:
#     print('Succeeded')
# finally:
#     f.close()

## 3.3 Files and the Operating System

In [None]:
# Read from the Book page 80 (98)