In [4]:
import numpy as np
from numpy.random import randn
import datetime

In [2]:
data = {i:randn() for i in range(7)}
data

{0: -0.4319142794443533,
 1: -0.6758282554053954,
 2: 0.4794615654308246,
 3: -0.22042684373176544,
 4: 0.04755962718232234,
 5: 1.2492909090739956,
 6: -0.4489122796942085}

In [3]:
%quickref

In [6]:
def __repr__(self):
    return 'Message: %s' % self.msg

In [6]:
d1 = datetime.datetime(2017,1,1,10,30)
d1

datetime.datetime(2017, 1, 1, 10, 30)

In [8]:
d1.strftime('%Y%m%d%H%M%S')

'20170101103000'

In [9]:
d2 = datetime.datetime.strptime('20170101113000', '%Y%m%d%H%M%S')

In [11]:
delta = d2 - d1
delta

datetime.timedelta(0, 3600)

In [12]:
d3 = d1 + delta
d3

datetime.datetime(2017, 1, 1, 11, 30)

In [14]:
d3.replace(hour=0)

datetime.datetime(2017, 1, 1, 0, 30)

In [15]:
[1,2] + [3,4]

[1, 2, 3, 4]

In [19]:
a = [1,2]
a.extend([3,4])
print(a)

[1, 2, 3, 4]


In [20]:
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

['He', 'saw', 'six', 'small', 'foxes']

In [21]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[v] = i
mapping

{'bar': 1, 'baz': 2, 'foo': 0}

In [22]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [23]:
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [24]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
            ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)
first_names
last_names

('Ryan', 'Clemens', 'Curt')

In [25]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [26]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [27]:
from collections import defaultdict

In [29]:
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)
by_letter    
    

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

In [30]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]
result = [name for names in all_data for name in names
          if name.count('e') >= 2]
result

['Steven']

In [31]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']

In [32]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [33]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [34]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [35]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [36]:
str.title

<method 'title' of 'str' objects>

In [37]:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

In [38]:
strings.sort(key=lambda x: len(set(list(x))))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

In [39]:
gen = (x ** 2 for x in range(100))
gen

<generator object <genexpr> at 0x000000000591A518>

In [40]:
dict((i, i **2) for i in range(5))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [41]:
sum(x ** 2 for x in range(100))

328350

In [42]:
import itertools
first_letter = lambda x: x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names)) # names is a generator

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


In [43]:
import sys
sys.getdefaultencoding()

'utf-8'

In [44]:
path = 'examples/segismundo.txt'

In [46]:
f = open(path)

lines = [x.rstrip() for x in open(path)]
lines


['Sue帽a el rico en su riqueza,',
 'que m谩s cuidados le ofrece;',
 '',
 'sue帽a el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sue帽a el que a medrar empieza,',
 'sue帽a el que afana y pretende,',
 'sue帽a el que agravia y ofende,',
 '',
 'y en el mundo, en conclusi贸n,',
 'todos sue帽an lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [47]:
f.close()

In [None]:


with open(path) as f:
    lines = [x.rstrip() for x in f]
    
    
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)
with open('tmp.txt') as f:
    lines = f.readlines()
lines

import os
os.remove('tmp.txt')

In [48]:
with open(path) as f:
    chars = f.read(10)
chars

'Sue帽a el r'

In [50]:
with open(path, 'rb') as f:
    data = f.read(10)
data

b'Sue\xc3\xb1a el '

In [51]:
data.decode('utf8')

'Sueña el '

In [53]:
data[:3].decode('utf8')

'Sue'

In [54]:
data[:4].decode('utf8')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 3: unexpected end of data

In [55]:
sink_path = 'sink.txt'
with open(path) as source:
    with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
        sink.write(source.read())
with open(sink_path, encoding='iso-8859-1') as f:
    print(f.read(10))

UnicodeEncodeError: 'latin-1' codec can't encode character '\u5e3d' in position 3: ordinal not in range(256)

In [57]:
import os
os.remove(sink_path)

In [58]:
f = open(path)
f.read(10)

'Sue帽a el r'

In [59]:
sys.getdefaultencoding()

'utf-8'

In [60]:
f.close()

In [61]:
f = open(path, 'rb')
f.read(10)

b'Sue\xc3\xb1a el '

In [62]:
f.close()

In [63]:
f = open(path, encoding='utf-8')
f.read(10)

'Sueña el r'

In [64]:
f.close()