In [1]:
key_list = ['k0', 'k1', 'k2', 'k3', 'k4']
value_list = list(range(5))

In [2]:
value_list

[0, 1, 2, 3, 4]

In [3]:
zip(key_list, value_list)

<zip at 0x7f8e9a17df08>

In [4]:
mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value
print(mapping)

{'k0': 0, 'k1': 1, 'k2': 2, 'k3': 3, 'k4': 4}


In [5]:
mapping = dict(zip(key_list, value_list))
print(mapping)

{'k0': 0, 'k1': 1, 'k2': 2, 'k3': 3, 'k4': 4}


In [6]:
value = mapping.get('k5', 0)
print(value)

0


In [7]:
words = ['apple', 'banana', 'bat', 'bar', 'art', 'book']
by_letter = {}

for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)

print(by_letter)

{'a': ['apple', 'art'], 'b': ['banana', 'bat', 'bar', 'book']}


In [8]:
from collections import defaultdict

by_letter = defaultdict(list)
print(by_letter)

for word in words:
    by_letter[word[0]].append(word)

print(by_letter)

defaultdict(<class 'list'>, {})
defaultdict(<class 'list'>, {'a': ['apple', 'art'], 'b': ['banana', 'bat', 'bar', 'book']})


In [9]:
hash('str')

8564087897873643899

In [10]:
hash((1, 2, (2, 6)))

1097644470112866575

In [11]:
hash((1, 2, [2, 3]))

TypeError: unhashable type: 'list'

In [12]:
d = {}
d[tuple([1, 2, 3])] = 5
d

{(1, 2, 3): 5}

In [13]:
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [14]:
{2, 2, 2, 1, 3, 3}

{1, 2, 3}

In [15]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [16]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [17]:
a | b

{1, 2, 3, 4, 5, 6, 7, 8}

In [18]:
a.intersection(b)

{3, 4, 5}

In [19]:
a & b

{3, 4, 5}

In [20]:
a.add(6)
a

{1, 2, 3, 4, 5, 6}

In [21]:
a.remove(6)
a

{1, 2, 3, 4, 5}

In [22]:
x = a.pop()
x

1

In [23]:
a

{2, 3, 4, 5}

In [24]:
a.pop()
a

{3, 4, 5}

In [25]:
a.add(1)
a

{1, 3, 4, 5}

In [26]:
a.add(2)
a

{1, 2, 3, 4, 5}

In [27]:
a - b

{1, 2}

In [28]:
a ^ b

{1, 2, 6, 7, 8}

In [29]:
a & b

{3, 4, 5}

In [30]:
a.issubset(b)

False

In [31]:
a.isdisjoint(b)

False

In [32]:
a.issuperset(b)

False

In [33]:
my_data = [1, 2, 3, 4]
my_set = {tuple(my_data)}

In [34]:
my_set

{(1, 2, 3, 4)}

In [35]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

In [36]:
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [37]:
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [38]:
set(map(len, strings))

{1, 2, 3, 4, 6}

In [39]:
loc_mapping = {val: index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

In [40]:
all_data = [
    ['John', 'Emily', 'Michael', 'Mary', 'Steven'], 
    ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

In [41]:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') >= 2]
    names_of_interest.extend(enough_es)
print(names_of_interest)

['Steven']


In [42]:
result = [name for names in all_data for name in names if name.count('e') >= 2]
result

['Steven']

In [43]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup if x % 2 == 0]
flattened

[2, 4, 6, 8]

In [44]:
[[x for x in tup] for tup in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [45]:
[x for tup in some_tuples for x in tup]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [46]:
states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south carolina##', 'West virginia?']

In [47]:
import re


def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [48]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [49]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result


In [50]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [51]:
for x in map(remove_punctuation, states):
    print(x)

 Alabama 
Georgia
Georgia
georgia
FlOrIda
south carolina
West virginia


In [52]:
no_punc_strings = map(remove_punctuation, states)

In [53]:
no_punc_strings

<map at 0x7f8e996ef828>

In [54]:
no_punc_strings = list(no_punc_strings)

In [55]:
no_space_strings = list(map(str.strip, no_punc_strings))
no_space_strings

['Alabama',
 'Georgia',
 'Georgia',
 'georgia',
 'FlOrIda',
 'south carolina',
 'West virginia']

In [56]:
[x.title() for x in no_space_strings]

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [57]:
def add_number(x, y):
    return x + y

add_five = lambda y: add_number(5, y)
add_five(6)

11

In [58]:
from functools import partial

In [59]:
add_five1 = partial(add_number, 5)
add_five1(6)

11

In [60]:
mapping

{'k0': 0, 'k1': 1, 'k2': 2, 'k3': 3, 'k4': 4}

In [61]:
for key in mapping:
    print(key)

k0
k1
k2
k3
k4


In [62]:
dict_iterator = iter(mapping)
dict_iterator

<dict_keyiterator at 0x7f8e9a184ae8>

In [63]:
list(dict_iterator)

['k0', 'k1', 'k2', 'k3', 'k4']

In [64]:
def squares(n=10):
    print('Generating squres from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        yield i ** 2

In [65]:
gen = squares()
gen

<generator object squares at 0x7f8e99725410>

In [66]:
for x in gen:
    print(x, end=' ')

Generating squres from 1 to 100
1 4 9 16 25 36 49 64 81 100 

In [67]:
gen = (x ** 2 for x in range(100))
gen

<generator object <genexpr> at 0x7f8e9a166f68>

In [68]:
sum(x ** 2 for x in range(100))

328350

In [69]:
sum(x for x in range(101))

5050

In [70]:
dict(('key' + str(i), i ** 2) for i in range(5))

{'key0': 0, 'key1': 1, 'key2': 4, 'key3': 9, 'key4': 16}

In [71]:
import itertools

In [72]:
first_letter = lambda x: x[0]

In [73]:
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

In [74]:
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names))

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


In [75]:
path = '/home/sha/data/pydata-book/examples/segismundo.txt'

In [76]:
f = open(path)

In [77]:
lines = [x.rstrip() for x in open(path)]

In [78]:
lines

['Sueña el rico en su riqueza,',
 'que más cuidados le ofrece;',
 '',
 'sueña el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueña el que a medrar empieza,',
 'sueña el que afana y pretende,',
 'sueña el que agravia y ofende,',
 '',
 'y en el mundo, en conclusión,',
 'todos sueñan lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [79]:
f.close()

In [80]:
with open(path) as f:
    lines = [x.rstrip() for x in f]
lines

['Sueña el rico en su riqueza,',
 'que más cuidados le ofrece;',
 '',
 'sueña el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueña el que a medrar empieza,',
 'sueña el que afana y pretende,',
 'sueña el que agravia y ofende,',
 '',
 'y en el mundo, en conclusión,',
 'todos sueñan lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [81]:
f = open(path)

In [82]:
f.read(10)

'Sueña el r'

In [83]:
f2 = open(path, 'rb')

In [84]:
f2.read(10)

b'Sue\xc3\xb1a el '

In [85]:
f.tell()

11

In [86]:
f2.tell()

10

In [87]:
import sys
sys.getdefaultencoding()

'utf-8'

In [88]:
f.seek(3)

3

In [89]:
f.read(1)

'ñ'

In [90]:
f.close()
f2.close()

In [91]:
with open(path, 'rb') as f:
    data = f.read(10)
data

b'Sue\xc3\xb1a el '

In [92]:
data.decode('utf8')

'Sueña el '

In [93]:
sink_path = 'sink.txt'

In [94]:
with open(path) as source:
    with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
        sink.write(source.read())

In [95]:
with open(sink_path, encoding='iso-8859-1') as f:
    print(f.read(10))

Sueña el r
