# 3. Built-in Data Structures, Functions, and Files

## 3.1 Data Structures and Sequences

### Tuple

In [2]:
tup = 4, 5, 6
tup

(4, 5, 6)

In [3]:
# When you’re defining tuples in more complicated expressions, it’s often necessary toenclose the values in parentheses, as in this example of creating a tuple of tuples:

nested_tup = (4,5,6),( 7,8)
nested_tup

((4, 5, 6), (7, 8))

In [4]:
tuple([4,0,2])

(4, 0, 2)

In [10]:
tup = tuple('string')
tup, tup[0]


(('s', 't', 'r', 'i', 'n', 'g'), 's')

In [11]:
# While the objects stored in a tuple may be mutable themselves, once the tuple is created 
# it’s not possible to modify which object is stored in each slot:

tup = tuple(['foo', [1, 2], True])
tup[2] = False

TypeError: 'tuple' object does not support item assignment

In [12]:
# You can concatenate tuples using the + operator to produce longer tuples:
(4, None, 'foo') + (6, 0) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [18]:
print(type(('bar',)), type(('bar')))

<class 'tuple'> <class 'str'>


In [13]:
(4, None, 'foo') + (6, 0) + ('bar')

TypeError: can only concatenate tuple (not "str") to tuple

In [20]:
# If an object inside a tuple is mutable, such as a list, you can modify it in-place:

tup = tuple(['foo', [1, 2], True])
tup[1].append(3)
tup

('foo', [1, 2, 3], True)

In [16]:
# You can concatenate tuples using the + operator to produce longer tuples:

(4, None, 'foo',) + (6, 0,) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [21]:
# Multiplying a tuple by an integer, as with lists, has the effect of concatenating together
# that many copies of the tuple:

('foo', 'bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

#### Unpacking tuples

In [22]:
tup = (4, 5, 6)
a, b, c = tup
print(a, b, c)

4 5 6


In [23]:
# Even sequences with nested tuples can be unpacked:

tup = 4, 5, (6, 7)
a, b, (c, d) = tup
print(a, b, c, d)

4 5 6 7


In [24]:
# Using  this  functionality  you  can  easily  swap  variable  names,  a  task  which  in  manylanguages might look like:
# tmp = a; a = b; b = tmp

# But, in Python, the swap can be done like this:

a, b = 1, 2
a, b = b, a
print(a, b)

2 1


In [25]:
# A common use of variable unpacking is iterating over sequences of tuples or lists:

seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [29]:
# The Python language recently acquired some more advanced tuple unpacking to help
# with situations where you may want to “pluck” a few elements from the beginning of
# a  tuple.  This  uses  the  special  syntax  *rest,  which  is  also  used  in  function  signatures
# to capture an arbitrarily long list of positional arguments:

values = 1, 2, 3, 4, 5
a, b, *rest = values
rest

[3, 4, 5]

In [30]:
# This  rest  bit  is  sometimes  something  you  want  to  discard;  there  is  nothing  special
# about the rest name. As a matter of convention, many Python programmers will use
# the underscore (_) for unwanted variables:

values = 1, 2, 3, 4, 5
a, b, *_ = values
a, b

(1, 2)

#### Tuple methods

In [31]:
a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

4

### List

In [33]:
a_list = [2, 3, 7, None]
a_list

[2, 3, 7, None]

In [41]:
tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list


['foo', 'bar', 'baz']

In [42]:
b_list[1] = 'peekaboo'
b_list

['foo', 'peekaboo', 'baz']

In [37]:
# The  list  function  is  frequently  used  in  data  processing  as  a  way  to  materialize  an
# iterator or generator expression:

gen = range(10)
gen
list(gen)


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

#### Adding and removing elements

In [43]:
# Elements can be appended to the end of the list with the append method:

b_list.append('dwarf')
b_list

['foo', 'peekaboo', 'baz', 'dwarf']

In [44]:
# Using insert you can insert an element at a specific location in the list:

b_list.insert(1, 'red')
b_list

['foo', 'red', 'peekaboo', 'baz', 'dwarf']

In [45]:
# The  inverse  operation  to  insert  is  pop,  which  removes  and  returns  an  element  at  aparticular index:

b_list.pop(2)
b_list

['foo', 'red', 'baz', 'dwarf']

In [50]:
# Elements can be removed by value with remove, which locates the first such value andremoves it from the last:

b_list = ['foo', 'bar', 'baz']
b_list.append('foo')
b_list.remove('foo')
b_list

['bar', 'baz', 'foo']

In [51]:
# Check if a list contains a value using the in keyword:

'dwarf' in b_list

False

In [52]:
'dwarf' not in b_list

True

#### Concatenating and combining lists

In [53]:
# Similar to tuples, adding two lists together with + concatenates them:

[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [56]:
# If  you  have  a  list  already  defined,  you  can  append  multiple  elements  to  it  using  theextend method:

x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

In [55]:
# Note that list concatenation by addition is a comparatively expensive operation sincea  new  list  must  be  created  and  the  objects  copied  over.  Using  extend  to  append  ele‐ments to an existing list, especially if you are building up a large list, is usually pref‐erable. Thus,
#
#
# everything = []
# for chunk in list_of_lists:
#     everything.extend(chunk)
# 
# is faster than the concatenative alternative:
# 
# everything = []
# for chunk in list_of_lists:
#     everything = everything + chunk


[4, None, 'foo', 7, 8, (2, 3)]

#### Sorting

In [58]:
# You  can  sort  a  list  in-place  (without  creating  a  new  object)  by  calling  its  sortfunction:

a = [7, 2, 5, 1, 3]
a.sort()
a

[1, 2, 3, 5, 7]

In [59]:
# we could sort a collection of strings by their lengths:

b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

['He', 'saw', 'six', 'small', 'foxes']

#### Binary search and maintaining a sorted list
The built-in bisect module implements binary search and insertion into a sorted list.
bisect.bisect finds the location where an element should be inserted to keep it sorted, while bisect.insort actually inserts the element into that location:

In [76]:
   import bisect 

   c = [1, 2, 2, 2, 3, 4, 7]
   bisect.bisect(c, 2)

4

In [77]:
bisect.bisect(c, 5)

6

In [78]:
bisect.insort(c, 6)
c

[1, 2, 2, 2, 3, 4, 6, 7]

#### Slicing

In [79]:
 seq = [7, 2, 3, 7, 5, 6, 0, 1]
 seq[1:5]

[2, 3, 7, 5]

In [80]:
seq[-6:-2]

[3, 7, 5, 6]

In [81]:
# A step can also be used after a second colon to, say, take every other element:

seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[::2]

[7, 3, 5, 0]

In [82]:
# A clever use of this is to pass -1, which has the useful effect of reversing a list or tuple:

seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[::-1]

[1, 0, 6, 5, 7, 3, 2, 7]

In [83]:

seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[::-2]

[1, 6, 7, 2]

### Built-in Sequence Functions

#### enumerate
t’s common when iterating over a sequence to want to keep track of the index of thecurrent item. A do-it-yourself approach would look like:

In [84]:
some_list = ['foo', 'bar', 'baz']
mapping = {}

for i, v in enumerate(some_list):
    mapping[i] = v

mapping

{0: 'foo', 1: 'bar', 2: 'baz'}

#### sorted
The sorted function returns a new sorted list from the elements of any sequence:

In [85]:
sorted([7, 1, 2, 6, 0, 3, 2])

[0, 1, 2, 2, 3, 6, 7]

In [86]:
sorted('horse race')

[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']

#### zip
zip “pairs” up the elements of a number of lists, tuples, or other sequences to create a list of tuples:

In [88]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
zipped, list(zipped) 

(<zip at 0x23d391fe740>, [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])

In [93]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']

for i, j in zip(seq1, seq2):
    print(i, j)

foo one
bar two
baz three


In [94]:
# zip  can  take  an  arbitrary  number  of  sequences,  and  the  number  of  elements  it  pro‐duces is determined by the shortest sequence:

seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
seq3 = [False, True]

list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [96]:
# A very common use of zip is simultaneously iterating over multiple sequences, 
# possibly also combined with enumerate:

seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']

for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, one
1: bar, two
2: baz, three


In [104]:
# Given  a  “zipped”  sequence,  zip  can  be  applied  in  a  clever  way  to  “unzip”  the
# sequence.  Another  way  to  think  about  this  is  converting  a  list  of  rows  into  a  list  of
# columns. The syntax, which looks a bit magical, is:

pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]
first_name, last_name = zip(*pitchers)
first_name, last_name

(('Nolan', 'Roger', 'Schilling'), ('Ryan', 'Clemens', 'Curt'))

#### reversed
reversed iterates over the elements of a sequence in reverse order:

In [106]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

### dict
dict  is  likely  the  most  important  built-in  Python  data  structure.  A  more  commonname for it is hash map or associative array. It is a flexibly sized collection of key-valuepairs, where key and value are Python objects. One approach for creating one is to usecurly braces {} and colons to separate keys and values:

In [107]:
empty_dict = {}
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
d1

{'a': 'some value', 'b': [1, 2, 3, 4]}

In [108]:
# You can access, insert, or set elements using the same syntax as for accessing elementsof a list or tuple:

d1[7] = 'an integer'
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [109]:
d1['b']

[1, 2, 3, 4]

In [110]:
# You  can  check  if  a  dict  contains  a  key  using  the  same  syntax  used  for  checkingwhether a list or tuple contains a value:

'b' in d1

True

In [118]:
# You can delete values either using the del keyword or the pop method (which simul‐taneously returns the value and deletes the key):

d1[5] = 'some value'
print(d1)

d1['dummy'] = 'another value'
print(d1)

del d1[5]
print(d1)

ret = d1.pop('dummy')
print(ret)

print(d1)


{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer', 5: 'some value'}
{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer', 5: 'some value', 'dummy': 'another value'}
{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer', 'dummy': 'another value'}
another value
{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}


In [120]:
# The keys and values method give you iterators of the dict’s keys and values, respec‐tively. While the key-value pairs are not in any particular order, these functions out‐put the keys and values in the same order:

list(d1.keys())

['a', 'b', 7]

In [121]:
list(d1.values())

['some value', [1, 2, 3, 4], 'an integer']

In [122]:
# You can merge one dict into another using the update method:

d1.update({'b' : 'foo', 'c' : 12})

d1


{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

#### Creating dicts from sequences
It’s  common  to  occasionally  end  up  with  two  sequences  that  you  want  to  pair  upelement-wise in a dict. As a first cut, you might write code like this:

mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value

In [125]:
 mapping = dict(zip(range(5), reversed(range(5))))
 mapping


{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

#### Default values

It’s very common to have logic like:

if key in some_dict:
    value = some_dict[key]
else:
    value = default_value


 the  dict  methods  get  and  pop  can  take  a  default  value  to  be  returned,  so  thatthe above if-else block can be written simply as:

 value = some_dict.get(key, default_value)


In [126]:
words = ['apple', 'bat', 'bar', 'atom', 'book']

by_letter = {}

for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [129]:
# The  setdefault  dict  method  is  for  precisely  this  purpose.  
# The  preceding  for  loopcan be rewritten as:
# 
# 
# for word in words:    
#     letter = word[0]    
#     by_letter.setdefault(letter, []).append(word)

words = ['apple', 'bat', 'bar', 'atom', 'book']

by_letter = {}
for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)

by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

#### Valid dict key types

While  the  values  of  a  dict  can  be  any  Python  object,  the  keys  generally  have  to  beimmutable  objects  like  scalar  types  (int,  float,  string)  or  tuples  (all  the  objects  in  thetuple  need  to  be  immutable,  too).  The  technical  term  here  is  hashability.  You  cancheck  whether  an  object  is  hashable  (can  be  used  as  a  key  in  a  dict)  with  the  hashfunction:

In [130]:
hash('string')

-8178162221448115541

In [131]:
hash((1,2,(2,3)))

-9209053662355515447

In [132]:
hash((1,2,[2,3]))

TypeError: unhashable type: 'list'

In [133]:
# To  use  a  list  as  a  key,  one  option  is  to  convert  it  to  a  tuple,  which  can  be  hashed  aslong as its elements also can:

d = {}

d[tuple([1,2,3])] = 5
d

{(1, 2, 3): 5}

### set

A set is an unordered collection of unique elements. You can think of them like dicts,but keys only, no values. A set can be created in two ways: via the set function or viaa set literal with curly braces:

In [134]:
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [135]:
{2, 2, 2, 1, 3, 3}

{1, 2, 3}

In [137]:
# Sets  support  mathematical  set  operations  like  union,  intersection,  difference,  and
# symmetric difference. Consider these two example sets:

a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

print(a.union(b))
print(a|b)

{1, 2, 3, 4, 5, 6, 7, 8}
{1, 2, 3, 4, 5, 6, 7, 8}


In [138]:
print(a.intersection(b))
print(a & b)

{3, 4, 5}
{3, 4, 5}


In [139]:
# All  of  the  logical  set  operations  have  in-place  counterparts,  which  enable  you  toreplace  the  contents  of  the  set  on  the  left  side  of  the  operation  with  the  result.  Forvery large sets, this may be more efficient:

c = a.copy()

In [142]:
c |= b
c

{1, 2, 3, 4, 5, 6, 7, 8}

In [143]:
d = a.copy()

d &= b
d

{3, 4, 5}

In [145]:
# Like dicts, set elements generally must be immutable. To have list-like elements, youmust convert it to a tuple:

my_data = [1, 2, 3, 4]
my_set = {tuple(my_data)}
my_set

{(1, 2, 3, 4)}

In [146]:
# You can also check if a set is a subset of (is contained in) or a superset of (contains all
# elements of ) another set:

a_set = {1, 2, 3, 4, 5}

{1, 2, 3}.issubset(a_set)

True

In [147]:
a_set.issuperset({1, 2, 3})

True

In [148]:
# Sets are equal if and only if their contents are equal:

{1, 2, 3} == {3, 2, 1}

True

### List, Set, and Dict Comprehensions (이해)

List  comprehensions  are  one  of  the  most-loved  Python  language  features.  They  allowyou to concisely form a new list by filtering the elements of a collection, transformingthe elements passing the filter in one concise expression. They take the basic form:

[expr for val in collection if condition]

In [149]:
# The filter condition can be omitted, leaving only the expression. For example, given alist of strings, we could filter out strings with length 2 or less and also convert them touppercase like this:

strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

[ x.upper() for x in strings if len(x) > 2 ]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [None]:
# Set  and  dict  comprehensions  are  a  natural  extension,  producing  sets  and  dicts  in  an
# idiomatically similar way instead of lists. A dict comprehension looks like this:
# 
# dict_comp = {key-expr : value-expr for value in collection if condition}

In [None]:
# A  set  comprehension  looks  like  the  equivalent  list  comprehension  except  with  curly
# braces instead of square brackets:
# 
# set_comp = {expr for value in collection if condition}

In [151]:
# Like list comprehensions, set and dict comprehensions are mostly conveniences, butthey similarly can make code both easier to write and read. Consider the list of stringsfrom before. Suppose we wanted a set containing just the lengths of the strings con‐tained in the collection; we could easily compute this using a set comprehension:

strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

unique_lengths = { len(x) for x in strings }
unique_lengths

{1, 2, 3, 4, 6}

In [154]:
# We  could  also  express  this  more  functionally  using  the  map  function,  introducedshortly:

set(map(len, strings))

{1, 2, 3, 4, 6}

In [157]:
# As  a  simple  dict  comprehension  example,  we  could  create  a  lookup  map  of  these
# strings to their locations in the list:

strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

loc_mapping = { val : index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

#### Nested list comprehensions

In [160]:
# Suppose we have a list of lists containing some English and Spanish names:

all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

names_of_interest = []
for names in all_data:
    enough_es = [ name for name in names if name.count('e') >= 2]
    names_of_interest.extend(enough_es)

names_of_interest


['Steven']

In [161]:
# You  can  actually  wrap  this  whole  operation  up  in  a  single  nested  list  comprehension,
# which will look like:

all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

[ name for names in all_data for name in names if name.count('e') >= 2 ]

['Steven']

In [164]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

flattened = [ x for tup in some_tuples for x in tup ]
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [170]:
# You can have arbitrarily many levels of nesting, though if you have more than two orthree levels of nesting you should probably start to question whether this makes sensefrom a code readability standpoint. It’s important to distinguish the syntax just shownfrom a list comprehension inside a list comprehension, which is also perfectly valid:

[ [x for x in tup] for tup in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [169]:
[ x for tup in some_tuples for x in tup ]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

## 3.2 Functions

In [174]:
# Functions are declared with the def keyword and returned from with the return key‐word:

def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

my_function(5, 6, z=0.7)  
my_function(3.14, 7, 3.5)      
my_function(10, 20)

45.0

### Namespaces, Scope, and Local Functions

Functions can access variables in two different scopes: global and local. An alternativeand more descriptive name describing a variable scope in Python is a namespace. Anyvariables  that  are  assigned  within  a  function  by  default  are  assigned  to  the  localnamespace. The local namespace is created when the function is called and immedi‐ately  populated  by  the  function’s  arguments.  After  the  function  is  finished,  the  localnamespace  is  destroyed  (with  some  exceptions  that  are  outside  the  purview  of  thischapter). Consider the following function:

def func():
    a = []
    for i in range(5):
    a.append(i)


When  func()  is  called,  the  empty  list  a  is  created,  five  elements  are  appended,  and
then  a  is  destroyed  when  the  function  exits.  Suppose  instead  we  had  declared  a  as
follows:

a = []
def func():
    for i in range(5):
        a.append(i)


In [175]:
# When  func()  is  called,  the  empty  list  a  is  created,  five  elements  are  appended,  and
# then  a  is  destroyed  when  the  function  exits.  Suppose  instead  we  had  declared  a  as
# follows:

a = []

def func():
    for i in range(5):
        a.append(i)


[]

In [184]:
# Assigning  variables  outside  of  the  function’s  scope  is  possible,  but  those  variablesmust be declared as global via the global keyword:

a = None

def bind_a_variable():
#    global a
    a = ['b']

bind_a_variable()
print(a)


None


In [183]:
b = []

def func():
    for i in range(5):
        b.append(i)
func()        
print(b)

[0, 1, 2, 3, 4]


In [185]:
a = []

def bind_a_variable():
    a.append(2)

bind_a_variable()
print(a)


[2]


### Returning Multiple Values

In [186]:
def f():
    a = 5
    b = 6    
    c = 7
    return a, b, c
a, b, c = f()
print(a, b, c)

5 6 7


###  Functions Are Objects

In [187]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south   carolina##', 'West virginia?']

import re

def clean_strings(strings):    
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [188]:
# An  alternative  approach  that  you  may  find  useful  is  to  make  a  list  of  the  operationsyou want to apply to a particular set of strings:

def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

clean_strings(states, clean_ops)    

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [195]:
# You can use functions as arguments to other functions like the built-in map function,
# which applies a function to a sequence of some kind:

states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south   carolina##', 'West virginia?']

def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

for x in map(remove_punctuation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


In [193]:
states

['   Alabama ',
 'Georgia!',
 'Georgia',
 'georgia',
 'FlOrIda',
 'south   carolina##',
 'West virginia?']

### Anonymous (Lambda) Functions

Python has support for so-called anonymous or lambda functions, which are a way ofwriting  functions  consisting  of  a  single  statement,  the  result  of  which  is  the  returnvalue. They are defined with the lambda keyword, which has no meaning other than“we are declaring an anonymous function”:

In [196]:
def short_function(x):
    return x * 2

equiv_anon = lambda x: x * 2

In [198]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]

ints = [4,0,1,5,6]
apply_to_list(ints, lambda x: x * 2)    

[8, 0, 2, 10, 12]

In [200]:
# You could also have written [x * 2 for x in ints], but here we were able to suc‐cinctly pass a custom operator to the apply_to_list function.

[int * 2 for int in ints]

[8, 0, 2, 10, 12]

In [207]:
# Here we could pass a lambda function to the list’s sort method:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

strings.sort(key=len)
print(strings)

# 중복없은 문자의 갯수가 적은 순으로 정렬
strings.sort(key=lambda x: len(set(list(x))))
print(strings)


['foo', 'bar', 'card', 'aaaa', 'abab']
['aaaa', 'foo', 'abab', 'bar', 'card']


In [217]:
a = list('abab')
b = set(list(a))
c = len(set(list(b)))
print(a, b, c)

['a', 'b', 'a', 'b'] {'a', 'b'} 2


### Currying: Partial Argument Application

Currying is computer science jargon (named after the mathematician Haskell Curry)that  means  deriving  new  functions  from  existing  ones  by  partial  argument  applica‐tion. For example, suppose we had a trivial function that adds two numbers together:

    def add_numbers(x, y):
        return x + y

Using  this  function,  we  could  derive  a  new  function  of  one  variable,  add_five,  that
adds 5 to its argument:    

    add_five = lambda y: add_numbers(5, y)

In [220]:
from functools import partial

def add_numbers(x, y):
        return x + y

add_five = partial(add_numbers, 5)
add_five 

functools.partial(<function add_numbers at 0x0000023D395B7310>, 5)

### Generators

Having a consistent way to iterate over sequences, like objects in a list or lines in a
file, is an important Python feature. This is accomplished by means of the iterator
protocol, a generic way to make objects iterable.

In [1]:
some_dict = {'a': 1, 'b': 2, 'c': 3}

for key in some_dict:
    print(key)


a
b
c


In [4]:
def squares(n=10):
    print('Generating squares from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        yield i ** 2

gen = squares()
for x in gen:
    print(x, end=' ')

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

#### Generator expressions

Another even more concise way to make a generator is by using a generator expres‐
sion. This is a generator analogue to list, dict, and set comprehensions; to create one,
enclose what would otherwise be a list comprehension within parentheses instead of
brackets:

In [6]:
gen = (x ** 2 for x in range(100))
gen

<generator object <genexpr> at 0x0000019CBDD924A0>

In [7]:
# This is completely equivalent to the following more verbose generator:

def _make_gen():
    for x in range(100):
        yield x ** 2
gen = _make_gen()
gen

<generator object _make_gen at 0x0000019CBD36A900>

In [8]:
# Generator expressions can be used instead of list comprehensions as function arguments in many cases:

sum(x ** 2 for x in range(100))

328350

In [9]:
dict((i, i **2) for i in range(5))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

#### itertools module

The standard library itertools module has a collection of generators for many com‐
mon data algorithms. For example, groupby takes any sequence and a function,
grouping consecutive elements in the sequence by return value of the function. Here’s
an example:


In [None]:
import itertools

first_letter = lambda x: x[0]
 
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
  
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names)) # names is a generator

### Errors and Exception Handling

In [13]:
def attempt_float(x):
    try:
        return float(x)
    except:
        return x
    
res = attempt_float(15)
print(res)

res = attempt_float('test')
print(res)

15.0
test


## 3.3 Files and the Operating System

Most of this book uses high-level tools like pandas.read_csv to read data files from
disk into Python data structures. However, it’s important to understand the basics of
how to work with files in Python. Fortunately, it’s very simple, which is one reason
why Python is so popular for text and file munging.

In [2]:
# The lines come out of the file with the end-of-line (EOL) markers intact, so you’ll 
# often see code to get an EOL-free list of lines in a file like:

path = 'examples/segismundo.txt'
f = open(path)
 
result = [ x.rstrip() for x in open(path) ]
print(result)

# When you use open to create file objects, it is important to explicitly close the file
# when you are finished with it. Closing the file releases its resources back to the oper‐
# ating system:

f.close()

['Sueña el rico en su riqueza,', 'que más cuidados le ofrece;', '', 'sueña el pobre que padece', 'su miseria y su pobreza;', '', 'sueña el que a medrar empieza,', 'sueña el que afana y pretende,', 'sueña el que agravia y ofende,', '', 'y en el mundo, en conclusión,', 'todos sueñan lo que son,', 'aunque ninguno lo entiende.']


In [3]:
# One of the ways to make it easier to clean up open files is to use the with statement:
with open(path) as f: 
    lines = [x.rstrip() for x in f]

print(lines)


['Sueña el rico en su riqueza,', 'que más cuidados le ofrece;', '', 'sueña el pobre que padece', 'su miseria y su pobreza;', '', 'sueña el que a medrar empieza,', 'sueña el que afana y pretende,', 'sueña el que agravia y ofende,', '', 'y en el mundo, en conclusión,', 'todos sueñan lo que son,', 'aunque ninguno lo entiende.']


In [4]:
f = open(path)
f.read(10)

'Sueña el r'

In [5]:
f2 = open(path, 'rb') # Binary mode
f2.read(10)

b'Sue\xc3\xb1a el '

In [6]:
# The read method advances the file handle’s position by the number of bytes read.
# tell gives you the current position:

print(f.tell())
f2.tell()


11


10

In [7]:
# Even though we read 10 characters from the file, the position is 11 because it took
# that many bytes to decode 10 characters using the default encoding. You can check
# the default encoding in the sys module:

import sys

sys.getdefaultencoding()


'utf-8'

In [11]:
# seek changes the file position to the indicated byte in the file:

print(f.seek(3))

f.read(1)

f.close()
f2.close()


ValueError: I/O operation on closed file.

In [10]:
with open(path) as f: 
    lines = [x.rstrip() for x in f]

print(lines)


['Sueña el rico en su riqueza,', 'que más cuidados le ofrece;', '', 'sueña el pobre que padece', 'su miseria y su pobreza;', '', 'sueña el que a medrar empieza,', 'sueña el que afana y pretende,', 'sueña el que agravia y ofende,', '', 'y en el mundo, en conclusión,', 'todos sueñan lo que son,', 'aunque ninguno lo entiende.']


In [12]:
# To write text to a file, you can use the file’s write or writelines methods. For exam‐
# ple, we could create a version of prof_mod.py with no blank lines like so:

with open('examples/tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)

In [13]:
with open('examples/tmp.txt') as f:
    lines = f.readlines()

lines    

['Sueña el rico en su riqueza,\n',
 'que más cuidados le ofrece;\n',
 'sueña el pobre que padece\n',
 'su miseria y su pobreza;\n',
 'sueña el que a medrar empieza,\n',
 'sueña el que afana y pretende,\n',
 'sueña el que agravia y ofende,\n',
 'y en el mundo, en conclusión,\n',
 'todos sueñan lo que son,\n',
 'aunque ninguno lo entiende.\n']

### Bytes and Unicode with Files

In [14]:
with open(path) as f:
    chars = f.read(10)

chars

'Sueña el r'

In [15]:
with open(path, 'rb') as f:
    data = f.read(10)

data

b'Sue\xc3\xb1a el '

In [16]:
data.decode('utf8')

'Sueña el '

In [19]:
data[:4].decode('utf8') # Error Occured
data[:5].decode('utf8')

'Sueñ'

In [20]:
# Text mode, combined with the encoding option of open, provides a convenient way
# to convert from one Unicode encoding to another:

path = 'examples/segismundo.txt'
sink_path = 'examples/sink.txt'

with open(path) as source:
    with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
        sink.write(source.read())

with open(sink_path, encoding='iso-8859-1') as f:
    print(f.read(10))


Sueña el r
