In [1]:
# 4.1 object reference
empty = []
nested = [empty, empty, empty]
nested[0].append("good")
nested

[['good'], ['good'], ['good']]

In [2]:
id(nested[0])

1801219796744

In [3]:
id(nested[1])

1801219796744

In [5]:
nested[1] = ["monty"]
nested[1]

['monty']

In [7]:
nested[0] is nested[2]

True

In [8]:
sent = ['No', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '.']
all(len(word) > 4 for word in sent)

False

In [9]:
any(len(word) > 4 for word in sent)

True

In [10]:
# 4.2 Sequences
t = 'walk', 'fem', 3
t

('walk', 'fem', 3)

In [11]:
list(t)

['walk', 'fem', 3]

In [13]:
words = ['I', 'turned', 'off', 'the', 'spectroroute']
words[2], words[3], words[4] = words[3], words[4], words[2]
words

['I', 'turned', 'the', 'spectroroute', 'off']

In [16]:
tags = ['noun', 'verb', 'prep', 'det', 'noun']
zipped = list(zip(words, tags))
zipped

[('I', 'noun'),
 ('turned', 'verb'),
 ('the', 'prep'),
 ('spectroroute', 'det'),
 ('off', 'noun')]

In [17]:
list(enumerate(words))

[(0, 'I'), (1, 'turned'), (2, 'the'), (3, 'spectroroute'), (4, 'off')]

In [18]:
import nltk

text = nltk.corpus.nps_chat.words()
cut = int(0.9 * len(text))
training_data, test_data = text[:cut], text[cut:]
text == training_data + test_data

True

In [19]:
len(training_data) / len(test_data)

9.0

In [20]:
words = 'I turned off the spectroroute'.split()
wordlens = [(len(word), word) for word in words]
wordlens.sort()
' '.join(w for (_, w) in wordlens)

'I off the turned spectroroute'

In [21]:
from nltk.tokenize import word_tokenize

text = '''"When I use a word," Humpty Dumpty said in rather a scornful tone,
"it means just what I choose it to mean - neither more nor less."'''

[w.lower() for w in word_tokenize(text)]

['``',
 'when',
 'i',
 'use',
 'a',
 'word',
 ',',
 "''",
 'humpty',
 'dumpty',
 'said',
 'in',
 'rather',
 'a',
 'scornful',
 'tone',
 ',',
 "''",
 'it',
 'means',
 'just',
 'what',
 'i',
 'choose',
 'it',
 'to',
 'mean',
 '-',
 'neither',
 'more',
 'nor',
 'less',
 '.',
 "''"]

In [23]:
# generator expression: more efficient, no need to store the whole list
max(w.lower() for w in word_tokenize(text))

'word'

In [25]:
# 4.3 Question of style

text = nltk.corpus.brown.words()
maxlen = max(len(word) for word in text)
[word for word in text if len(word) == maxlen]

['nnuolapertar-it-vuh-karti-birifw-']

In [26]:
# 4.4 Functions
# LGB rule: local global then builtin
# defensive programming: assert isinstance(word, basestring)

In [31]:
# 4.5 Doing more with Functions
# pass functions as parameters

sent = ['Take', 'care', 'of', 'the', 'sense', ',', 'and', 'the', 'sounds', 'will', 'take', 'care', 'of', 'themselves', '.']
def extract_property(prop):
    return [prop(word) for word in sent]

extract_property(lambda w : w[-1])

['e', 'e', 'f', 'e', 'e', ',', 'd', 'e', 's', 'l', 'e', 'e', 'f', 's', '.']

TypeError: sorted expected 1 arguments, got 2

In [35]:
# Generator, yield, more efficient
def search2(substring, words):
    for word in words:
        if substring in word:
            yield word

for item in search2('zz', nltk.corpus.brown.words()):
    print(item, end=" ")

Grizzlies' fizzled Rizzuto huzzahs dazzler jazz Pezza Pezza Pezza embezzling embezzlement pizza jazz Ozzie nozzle drizzly puzzle puzzle dazzling Sizzling guzzle puzzles dazzling jazz jazz Jazz jazz Jazz jazz jazz Jazz jazz jazz jazz Jazz jazz dizzy jazz Jazz puzzler jazz jazzmen jazz jazz Jazz Jazz Jazz jazz Jazz jazz jazz jazz Jazz jazz jazz jazz jazz jazz jazz jazz jazz jazz Jazz Jazz jazz jazz nozzles nozzle puzzle buzz puzzle blizzard blizzard sizzling puzzled puzzle puzzle muzzle muzzle muezzin blizzard Neo-Jazz jazz muzzle piazzas puzzles puzzles embezzle buzzed snazzy buzzes puzzled puzzled muzzle whizzing jazz Belshazzar Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie's Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie blizzard blizzards blizzard blizzard fuzzy Lazzeri Piazza piazza palazzi Piazza Piazza Palazzo Palazzo Palazzo Piazza Piazza Palazzo palazzo palazzo Palazzo Palazzo Piazza piazza piazza piazza Piazza Piazza Palazzo palazzo Piazza piazz

In [37]:
# The permutations algorithm(recursive)
def permutations(seq):
    if len(seq) <= 1:
        yield seq
    else:
        for perm in permutations(seq[1:]):
            for i in range(len(perm) + 1):
                yield perm[0:i] + seq[0:1] + perm[i:]

list(permutations(["police", "fish", "buffalo"]))

[['police', 'fish', 'buffalo'],
 ['fish', 'police', 'buffalo'],
 ['fish', 'buffalo', 'police'],
 ['police', 'buffalo', 'fish'],
 ['buffalo', 'police', 'fish'],
 ['buffalo', 'fish', 'police']]

In [None]:
# 4.6 Program Development