diff --git a/book/ch01.rst b/book/ch01.rst index 7b78993e..c5e90af1 100644 --- a/book/ch01.rst +++ b/book/ch01.rst @@ -1052,7 +1052,7 @@ Here are all words from the chat corpus that are longer than seven characters, that occur more than seven times: >>> fdist5 = FreqDist(text5) - >>> sorted([w for w in set(text5) if len(w) > 7 and fdist5[w] > 7]) + >>> sorted(w for w in set(text5) if len(w) > 7 and fdist5[w] > 7) ['#14-19teens', '#talkcity_adults', '((((((((((', '........', 'Question', 'actually', 'anything', 'computer', 'cute.-ass', 'everyone', 'football', 'innocent', 'listening', 'remember', 'seriously', 'something', 'together', @@ -1281,13 +1281,13 @@ words containing `gnt`:lx:; words having an initial capital; and words consisting entirely of digits. - >>> sorted([w for w in set(text1) if w.endswith('ableness')]) + >>> sorted(w for w in set(text1) if w.endswith('ableness')) ['comfortableness', 'honourableness', 'immutableness', 'indispensableness', ...] - >>> sorted([term for term in set(text4) if 'gnt' in term]) + >>> sorted(term for term in set(text4) if 'gnt' in term) ['Sovereignty', 'sovereignties', 'sovereignty'] - >>> sorted([item for item in set(text6) if item.istitle()]) + >>> sorted(item for item in set(text6) if item.istitle()) ['A', 'Aaaaaaaaah', 'Aaaaaaaah', 'Aaaaaah', 'Aaaah', 'Aaaaugh', 'Aaagh', ...] - >>> sorted([item for item in set(sent7) if item.isdigit()]) + >>> sorted(item for item in set(sent7) if item.isdigit()) ['29', '61'] >>> @@ -1303,10 +1303,10 @@ then we can combine them to form a new condition using conjunction and disjuncti Next, try to make up some conditions of your own. .. doctest-ignore:: - >>> sorted([w for w in set(text7) if '-' in w and 'index' in w]) - >>> sorted([wd for wd in set(text3) if wd.istitle() and len(wd) > 10]) - >>> sorted([w for w in set(sent7) if not w.islower()]) - >>> sorted([t for t in set(text2) if 'cie' in t or 'cei' in t]) + >>> sorted(w for w in set(text7) if '-' in w and 'index' in w) + >>> sorted(wd for wd in set(text3) if wd.istitle() and len(wd) > 10) + >>> sorted(w for w in set(sent7) if not w.islower()) + >>> sorted(t for t in set(text2) if 'cie' in t or 'cei' in t) Operating on Every Element -------------------------- @@ -1341,7 +1341,7 @@ Let's return to the question of vocabulary size, and apply the same idiom here: 260819 >>> len(set(text1)) 19317 - >>> len(set([word.lower() for word in text1])) + >>> len(set(word.lower() for word in text1)) 17231 >>> @@ -1350,7 +1350,7 @@ in capitalization, we've wiped 2,000 off the vocabulary count! We can go a step and eliminate numbers and punctuation from the vocabulary count by filtering out any non-alphabetic items: - >>> len(set([word.lower() for word in text1 if word.isalpha()])) + >>> len(set(word.lower() for word in text1 if word.isalpha())) 16948 >>> @@ -1484,7 +1484,7 @@ then we loop over each item and print it. Notice the comma at the end of the print statement, which tells Python to produce its output on a single line. - >>> tricky = sorted([w for w in set(text2) if 'cie' in w or 'cei' in w]) + >>> tricky = sorted(w for w in set(text2) if 'cie' in w or 'cei' in w) >>> for word in tricky: ... print(word, end=' ') ancient ceiling conceit conceited conceive conscience @@ -1814,7 +1814,7 @@ Summary * We obtain the vocabulary of a text ``t`` using ``sorted(set(t))``. * We operate on each item of a text using ``[f(x) for x in text]``. * To derive the vocabulary, collapsing case distinctions and ignoring punctuation, - we can write ``set([w.lower() for w in text if w.isalpha()])``. + we can write ``set(w.lower() for w in text if w.isalpha())``. * We process each word in a text using a ``for`` statement, such as ``for w in t:`` or ``for word in text:``. This must be followed by the colon character and an indented block of code, to be executed each time through the loop. @@ -1978,8 +1978,8 @@ Exercises Which one will give a larger value? Will this be the case for other texts? .. doctest-ignore:: - >>> sorted(set([w.lower() for w in text1])) - >>> sorted([w.lower() for w in set(text1)]) + >>> sorted(set(w.lower() for w in text1)) + >>> sorted(w.lower() for w in set(text1)) #. |soso| What is the difference between the following two tests: ``w.isupper()`` and ``not w.islower()``? @@ -2011,7 +2011,7 @@ Exercises a) Print all words beginning with `sh`:lx: b) Print all words longer than four characters -#. |soso| What does the following Python code do? ``sum([len(w) for w in text1])`` +#. |soso| What does the following Python code do? ``sum(len(w) for w in text1)`` Can you use it to work out the average word length of a text? #. |soso| Define a function called ``vocab_size(text)`` that has a single diff --git a/book/ch02.rst b/book/ch02.rst index 576f00f1..d4d57bd5 100755 --- a/book/ch02.rst +++ b/book/ch02.rst @@ -125,7 +125,7 @@ sure that the numbers are all integers, using ``int()``. ... num_chars = len(gutenberg.raw(fileid)) # [_raw-access] ... num_words = len(gutenberg.words(fileid)) ... num_sents = len(gutenberg.sents(fileid)) - ... num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)])) + ... num_vocab = len(set(w.lower() for w in gutenberg.words(fileid))) ... print(int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid) ... 4 24 26 austen-emma.txt @@ -170,7 +170,7 @@ a list of words: >>> macbeth_sentences[1116] ['Double', ',', 'double', ',', 'toile', 'and', 'trouble', ';', 'Fire', 'burne', ',', 'and', 'Cauldron', 'bubble'] - >>> longest_len = max([len(s) for s in macbeth_sentences]) + >>> longest_len = max(len(s) for s in macbeth_sentences) >>> [s for s in macbeth_sentences if len(s) == longest_len] [['Doubtfull', 'it', 'stood', ',', 'As', 'two', 'spent', 'Swimmers', ',', 'that', 'doe', 'cling', 'together', ',', 'And', 'choake', 'their', 'Art', ':', 'The', @@ -276,7 +276,7 @@ is to produce the counts for a particular genre. Remember to >>> from nltk.corpus import brown >>> news_text = brown.words(categories='news') - >>> fdist = nltk.FreqDist([w.lower() for w in news_text]) + >>> fdist = nltk.FreqDist(w.lower() for w in news_text) >>> modals = ['can', 'could', 'may', 'might', 'must', 'will'] >>> for m in modals: ... print(m + ':', fdist[m], end=' ') @@ -1643,7 +1643,7 @@ the (immediate) `hyponyms`:dt:. >>> types_of_motorcar = motorcar.hyponyms() >>> types_of_motorcar[0] Synset('ambulance.n.01') - >>> sorted([lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas()]) + >>> sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas()) ['Model_T', 'S.U.V.', 'SUV', 'Stanley_Steamer', 'ambulance', 'beach_waggon', 'beach_wagon', 'bus', 'cab', 'compact', 'compact_car', 'convertible', 'coupe', 'cruiser', 'electric', 'electric_automobile', 'electric_car', diff --git a/book/ch04.rst b/book/ch04.rst index 3854e135..43c8e258 100644 --- a/book/ch04.rst +++ b/book/ch04.rst @@ -1431,7 +1431,7 @@ Here's a pair of equivalent examples which count the number of vowels in each wo >>> list(map(lambda w: len(filter(lambda c: c.lower() in "aeiou", w)), sent)) [2, 2, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 1, 3, 0] - >>> [len([c for c in w if c.lower() in "aeiou"]) for w in sent] + >>> [len(c for c in w if c.lower() in "aeiou") for w in sent] [2, 2, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 1, 3, 0] The solutions based on list comprehensions are usually more readable than the diff --git a/book/ch06.rst b/book/ch06.rst index e2229350..3b6fbe74 100644 --- a/book/ch06.rst +++ b/book/ch06.rst @@ -1474,7 +1474,7 @@ of a list of labels. def entropy(labels): freqdist = nltk.FreqDist(labels) probs = [freqdist.freq(l) for l in freqdist] - return -sum([p * math.log(p,2) for p in probs]) + return -sum(p * math.log(p,2) for p in probs) >>> print(entropy(['male', 'male', 'male', 'male'])) # doctest: +SKIP 0.0 diff --git a/book/ch08.rst b/book/ch08.rst index 7ac7184e..d354c010 100644 --- a/book/ch08.rst +++ b/book/ch08.rst @@ -951,7 +951,7 @@ As expected, there is a ``V`` in cell (1, 2). return wfst def display(wfst, tokens): - print('\nWFST ' + ' '.join([("%-4d" % i) for i in range(1, len(wfst))])) + print('\nWFST ' + ' '.join(("%-4d" % i) for i in range(1, len(wfst)))) for i in range(len(wfst)-1): print("%d " % i, end=" ") for j in range(1, len(wfst)): diff --git a/book/ch10.rst b/book/ch10.rst index 1038b6cd..f734f6c8 100644 --- a/book/ch10.rst +++ b/book/ch10.rst @@ -2353,8 +2353,8 @@ list of discourse of referents and a list of |DRS| conditions:: The easiest way to build a ``DRS`` object in |NLTK| is by parsing a string representation parse-drs_. - >>> dexpr = nltk.sem.DrtExpression.fromstring - >>> drs1 = dexpr('([x, y], [angus(x), dog(y), own(x, y)])') # [_parse-drs] + >>> read_dexpr = nltk.sem.DrtExpression.fromstring + >>> drs1 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])') # [_parse-drs] >>> print(drs1) ([x,y],[angus(x), dog(y), own(x,y)]) @@ -2389,7 +2389,7 @@ is a single |DRS| containing the merged discourse referents and the conditions from both arguments. |DRS|\ -concatenation automatically |alpha|\ -converts bound variables to avoid name-clashes. - >>> drs2 = dexpr('([x], [walk(x)]) + ([y], [run(y)])') + >>> drs2 = read_dexpr('([x], [walk(x)]) + ([y], [run(y)])') >>> print(drs2) (([x],[walk(x)]) + ([y],[run(y)])) >>> print(drs2.simplify()) @@ -2402,7 +2402,7 @@ discourse referents, and the sole condition is made up of two sub-\ |DRS|\ s, connected by an implication. Again, we can use ``fol()`` to get a handle on the truth conditions. - >>> drs3 = dexpr('([], [(([x], [dog(x)]) -> ([y],[ankle(y), bite(x, y)]))])') + >>> drs3 = read_dexpr('([], [(([x], [dog(x)]) -> ([y],[ankle(y), bite(x, y)]))])') >>> print(drs3.fol()) all x.(dog(x) -> exists y.(ankle(y) & bite(x,y))) @@ -2419,8 +2419,8 @@ condition of the form ``x = [...]``, where ``[...]`` is a list of possible antecedents. - >>> drs4 = dexpr('([x, y], [angus(x), dog(y), own(x, y)])') - >>> drs5 = dexpr('([u, z], [PRO(u), irene(z), bite(u, z)])') + >>> drs4 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])') + >>> drs5 = read_dexpr('([u, z], [PRO(u), irene(z), bite(u, z)])') >>> drs6 = drs4 + drs5 >>> print(drs6.simplify()) ([u,x,y,z],[angus(x), dog(y), own(x,y), PRO(u), irene(z), bite(u,z)])