run (parts of) pyupgrade to fix invalid escape sequences (#2747)

nltk · Jul 20, 2021 · 447ae50 · 447ae50
1 parent de0b51d
commit 447ae50
Show file tree

Hide file tree

Showing 47 changed files with 71 additions and 122 deletions.
diff --git a/nltk/app/chartparser_app.py b/nltk/app/chartparser_app.py
@@ -2551,7 +2551,7 @@ def app():
     for rule in grammar.productions():
         print(("    ", repr(rule) + ","))
     print(")")
-    print(("tokens = %r" % tokens))
+    print("tokens = %r" % tokens)
     print('Calling "ChartParserApp(grammar, tokens)"...')
     ChartParserApp(grammar, tokens).mainloop()
 

diff --git a/nltk/app/chunkparser_app.py b/nltk/app/chunkparser_app.py
@@ -1053,14 +1053,12 @@ def show_help(self, tab):
                 text = text.replace(
                     "<<TAGSET>>",
                     "\n".join(
-                        (
-                            "\t%s\t%s" % item
-                            for item in sorted(
-                                list(self.tagset.items()),
-                                key=lambda t_w: re.match(r"\w+", t_w[0])
-                                and (0, t_w[0])
-                                or (1, t_w[0]),
-                            )
+                        "\t%s\t%s" % item
+                        for item in sorted(
+                            list(self.tagset.items()),
+                            key=lambda t_w: re.match(r"\w+", t_w[0])
+                            and (0, t_w[0])
+                            or (1, t_w[0]),
                         )
                     ),
                 )

diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py
@@ -588,10 +588,8 @@ def _collect_all_synsets(word, pos, synset_relations=dict()):
     part of speech.
     """
     return "<ul>%s\n</ul>\n" % "".join(
-        (
-            _collect_one_synset(word, synset, synset_relations)
-            for synset in wn.synsets(word, pos)
-        )
+        _collect_one_synset(word, synset, synset_relations)
+        for synset in wn.synsets(word, pos)
     )
 
 
@@ -647,11 +645,9 @@ def make_synset_html(db_name, disp_name, rels):
     html = (
         "<ul>"
         + "\n".join(
-            (
-                "<li>%s</li>" % make_synset_html(*rel_data)
-                for rel_data in get_relations_data(word, synset)
-                if rel_data[2] != []
-            )
+            "<li>%s</li>" % make_synset_html(*rel_data)
+            for rel_data in get_relations_data(word, synset)
+            if rel_data[2] != []
         )
         + "</ul>"
     )

diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py
@@ -1337,7 +1337,7 @@ def demo_eval(chunkparser, text):
 
     print("/" + ("=" * 75) + "\\")
     print("Scoring", chunkparser)
-    print(("-" * 77))
+    print("-" * 77)
     print("Precision: %5.1f%%" %
           (chunkscore.precision() * 100), " " * 4, end=" ")
     print("Recall: %5.1f%%" % (chunkscore.recall() * 100), " " * 6, end=" ")

diff --git a/nltk/classify/decisiontree.py b/nltk/classify/decisiontree.py
@@ -268,10 +268,8 @@ def best_stump(feature_names, labeled_featuresets, verbose=False):
                 best_stump = stump
         if verbose:
             print(
-                (
-                    "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
-                        len(labeled_featuresets), best_stump._fname, best_error
-                    )
+                "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
+                    len(labeled_featuresets), best_stump._fname, best_error
                 )
             )
         return best_stump
@@ -322,10 +320,8 @@ def best_binary_stump(
             else:
                 descr = "(default)"
             print(
-                (
-                    "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
-                        len(labeled_featuresets), descr, best_error
-                    )
+                "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
+                    len(labeled_featuresets), descr, best_error
                 )
             )
         return best_stump

diff --git a/nltk/classify/maxent.py b/nltk/classify/maxent.py
@@ -1396,7 +1396,7 @@ def calculate_deltas(
         deltas -= (ffreq_empirical - sum1) / -sum2
 
         # We can stop once we converge.
-        n_error = numpy.sum(abs((ffreq_empirical - sum1))) / numpy.sum(abs(deltas))
+        n_error = numpy.sum(abs(ffreq_empirical - sum1)) / numpy.sum(abs(deltas))
         if n_error < NEWTON_CONVERGE:
             return deltas
 

diff --git a/nltk/classify/naivebayes.py b/nltk/classify/naivebayes.py
@@ -147,10 +147,8 @@ def labelprob(l):
                     cpdist[l1, fname].prob(fval) / cpdist[l0, fname].prob(fval)
                 )
             print(
-                (
-                    "%24s = %-14r %6s : %-6s = %s : 1.0"
-                    % (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio)
-                )
+                "%24s = %-14r %6s : %-6s = %s : 1.0"
+                % (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio)
             )
 
     def most_informative_features(self, n=100):

diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py
@@ -132,7 +132,7 @@ def tag_sents(self, sentences):
         _senna_cmd.extend(["-" + op for op in self.operations])
 
         # Serialize the actual sentences to a temporary string
-        _input = "\n".join((" ".join(x) for x in sentences)) + "\n"
+        _input = "\n".join(" ".join(x) for x in sentences) + "\n"
         if isinstance(_input, str) and encoding:
             _input = _input.encode(encoding)
 

diff --git a/nltk/classify/weka.py b/nltk/classify/weka.py
@@ -52,7 +52,7 @@ def config_weka(classpath=None):
                 version = _check_weka_version(_weka_classpath)
                 if version:
                     print(
-                        ("[Found Weka: %s (version %s)]" % (_weka_classpath, version))
+                        "[Found Weka: %s (version %s)]" % (_weka_classpath, version)
                     )
                 else:
                     print("[Found Weka: %s]" % _weka_classpath)

diff --git a/nltk/data.py b/nltk/data.py
@@ -1294,7 +1294,7 @@ def tell(self):
         bytes_read = (orig_filepos - len(self.bytebuffer)) - self._rewind_checkpoint
         buf_size = sum(len(line) for line in self.linebuffer)
         est_bytes = int(
-            (bytes_read * self._rewind_numchars / (self._rewind_numchars + buf_size))
+            bytes_read * self._rewind_numchars / (self._rewind_numchars + buf_size)
         )
 
         self.stream.seek(self._rewind_checkpoint)

diff --git a/nltk/downloader.py b/nltk/downloader.py
@@ -1245,8 +1245,8 @@ def _show_config(self):
         print()
         print("Data Server:")
         print("  - URL: <%s>" % self._ds.url)
-        print(("  - %d Package Collections Available" % len(self._ds.collections())))
-        print(("  - %d Individual Packages Available" % len(self._ds.packages())))
+        print("  - %d Package Collections Available" % len(self._ds.collections()))
+        print("  - %d Individual Packages Available" % len(self._ds.packages()))
         print()
         print("Local Machine:")
         print("  - Data directory: %s" % self._ds.download_dir)
@@ -1268,7 +1268,7 @@ def _simple_interactive_config(self):
                 elif os.path.isdir(new_dl_dir):
                     self._ds.download_dir = new_dl_dir
                 else:
-                    print(("Directory %r not found!  Create it first." % new_dl_dir))
+                    print("Directory %r not found!  Create it first." % new_dl_dir)
             elif user_input == "u":
                 new_url = input("  New URL> ").strip()
                 if new_url in ("", "x", "q", "X", "Q"):

diff --git a/nltk/grammar.py b/nltk/grammar.py
@@ -754,7 +754,7 @@ def chomsky_normal_form(self, new_token_padding="@$@", flexible=False):
             return self
         if self.productions(empty=True):
             raise ValueError(
-                ("Grammar has Empty rules. " "Cannot deal with them at the moment")
+                "Grammar has Empty rules. " "Cannot deal with them at the moment"
             )
 
         # check for mixed rules

diff --git a/nltk/metrics/agreement.py b/nltk/metrics/agreement.py
@@ -142,11 +142,11 @@ def agr(self, cA, cB, i, data=None):
         # cfedermann: we don't know what combination of coder/item will come
         # first in x; to avoid StopIteration problems due to assuming an order
         # cA,cB, we allow either for k1 and then look up the missing as k2.
-        k1 = next((x for x in data if x["coder"] in (cA, cB) and x["item"] == i))
+        k1 = next(x for x in data if x["coder"] in (cA, cB) and x["item"] == i)
         if k1["coder"] == cA:
-            k2 = next((x for x in data if x["coder"] == cB and x["item"] == i))
+            k2 = next(x for x in data if x["coder"] == cB and x["item"] == i)
         else:
-            k2 = next((x for x in data if x["coder"] == cA and x["item"] == i))
+            k2 = next(x for x in data if x["coder"] == cA and x["item"] == i)
 
         ret = 1.0 - float(self.distance(k1["labels"], k2["labels"]))
         log.debug("Observed agreement between %s and %s on %s: %f", cA, cB, i, ret)

diff --git a/nltk/metrics/distance.py b/nltk/metrics/distance.py
@@ -250,11 +250,11 @@ def presence(label):
 
 def fractional_presence(label):
     return (
-        lambda x, y: abs(((1.0 / len(x)) - (1.0 / len(y))))
+        lambda x, y: abs((1.0 / len(x)) - (1.0 / len(y)))
         * (label in x and label in y)
         or 0.0 * (label not in x and label not in y)
-        or abs((1.0 / len(x))) * (label in x and label not in y)
-        or ((1.0 / len(y))) * (label not in x and label in y)
+        or abs(1.0 / len(x)) * (label in x and label not in y)
+        or (1.0 / len(y)) * (label not in x and label in y)
     )
 
 

diff --git a/nltk/misc/sort.py b/nltk/misc/sort.py
@@ -167,10 +167,8 @@ def demo():
         count_quick = quick(a)
 
         print(
-            (
-                ("size=%5d:  selection=%8d,  bubble=%8d,  " "merge=%6d,  quick=%6d")
-                % (size, count_selection, count_bubble, count_merge, count_quick)
-            )
+            ("size=%5d:  selection=%8d,  bubble=%8d,  " "merge=%6d,  quick=%6d")
+            % (size, count_selection, count_bubble, count_merge, count_quick)
         )
 
 

diff --git a/nltk/parse/nonprojectivedependencyparser.py b/nltk/parse/nonprojectivedependencyparser.py
@@ -150,13 +150,11 @@ def score(self, graph):
         for head_node in graph.nodes.values():
             for child_node in graph.nodes.values():
                 edges.append(
-                    (
-                        dict(
-                            a=head_node["word"],
-                            b=head_node["tag"],
-                            c=child_node["word"],
-                            d=child_node["tag"],
-                        )
+                    dict(
+                        a=head_node["word"],
+                        b=head_node["tag"],
+                        c=child_node["word"],
+                        d=child_node["tag"],
                     )
                 )
 

diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py
@@ -121,7 +121,7 @@ def parse(self, tokens):
         # Initialize the constituents dictionary with the words from
         # the text.
         if self._trace:
-            print(("Inserting tokens into the most likely" + " constituents table..."))
+            print("Inserting tokens into the most likely" + " constituents table...")
         for index in range(len(tokens)):
             token = tokens[index]
             constituents[index, index + 1, token] = token
@@ -133,10 +133,8 @@ def parse(self, tokens):
         for length in range(1, len(tokens) + 1):
             if self._trace:
                 print(
-                    (
-                        "Finding the most likely constituents"
-                        + " spanning %d text elements..." % length
-                    )
+                    "Finding the most likely constituents"
+                    + " spanning %d text elements..." % length
                 )
             for start in range(len(tokens) - length + 1):
                 span = (start, start + length)

diff --git a/nltk/probability.py b/nltk/probability.py
@@ -2491,10 +2491,8 @@ def demo(numsamples=6, numoutcomes=500):
 
     # Print the results in a formatted table.
     print(
-        (
-            "%d samples (1-%d); %d outcomes were sampled for each FreqDist"
-            % (numsamples, numsamples, numoutcomes)
-        )
+        "%d samples (1-%d); %d outcomes were sampled for each FreqDist"
+        % (numsamples, numsamples, numoutcomes)
     )
     print("=" * 9 * (len(pdists) + 2))
     FORMATSTR = "      FreqDist " + "%8s " * (len(pdists) - 1) + "|  Actual"

diff --git a/nltk/sem/evaluate.py b/nltk/sem/evaluate.py
@@ -95,7 +95,7 @@ def set2rel(s):
         if isinstance(elem, str):
             new.add((elem,))
         elif isinstance(elem, int):
-            new.add((str(elem)))
+            new.add(str(elem))
         else:
             new.add(elem)
     return new

diff --git a/nltk/stem/rslp.py b/nltk/stem/rslp.py
@@ -1,4 +1,3 @@
-
 # Natural Language Toolkit: RSLP Stemmer
 #
 # Copyright (C) 2001-2021 NLTK Project

diff --git a/nltk/stem/snowball.py b/nltk/stem/snowball.py
@@ -5915,10 +5915,8 @@ def demo():
 
         if language not in SnowballStemmer.languages:
             print(
-                (
-                    "\nOops, there is no stemmer for this language. "
-                    + "Please try again.\n"
-                )
+                "\nOops, there is no stemmer for this language. "
+                + "Please try again.\n"
             )
             continue
 

diff --git a/nltk/tag/brill_trainer.py b/nltk/tag/brill_trainer.py
@@ -279,7 +279,7 @@ def train(self, train_sents, max_rules=200, min_score=2, min_acc=None):
             print("Finding initial useful rules...")
         self._init_mappings(test_sents, train_sents)
         if self._trace:
-            print(("    Found {} useful rules.".format(len(self._rule_scores))))
+            print("    Found {} useful rules.".format(len(self._rule_scores)))
 
         # Let the user know what we're up to.
         if self._trace > 2:

diff --git a/nltk/tag/senna.py b/nltk/tag/senna.py
@@ -105,8 +105,8 @@ def bio_to_chunks(self, tagged_sent, chunk_type):
         for idx, word_pos in enumerate(tagged_sent):
             word, pos = word_pos
             if "-" + chunk_type in pos:  # Append the word to the current_chunk.
-                current_chunk.append((word))
-                current_chunk_position.append((idx))
+                current_chunk.append(word)
+                current_chunk_position.append(idx)
             else:
                 if current_chunk:  # Flush the full chunk when out of an NP.
                     _chunk_str = " ".join(current_chunk)

diff --git a/nltk/tag/stanford.py b/nltk/tag/stanford.py
@@ -102,7 +102,7 @@ def tag_sents(self, sentences):
 
         # Write the actual sentences to the temporary input file
         _input_fh = os.fdopen(_input_fh, "wb")
-        _input = "\n".join((" ".join(x) for x in sentences))
+        _input = "\n".join(" ".join(x) for x in sentences)
         if isinstance(_input, str) and encoding:
             _input = _input.encode(encoding)
         _input_fh.write(_input)

diff --git a/nltk/tbl/feature.py b/nltk/tbl/feature.py
@@ -220,10 +220,8 @@ def intersects(self, other):
         """
 
         return bool(
-            (
-                self.__class__ is other.__class__
-                and set(self.positions) & set(other.positions)
-            )
+            self.__class__ is other.__class__
+            and set(self.positions) & set(other.positions)
         )
 
     # Rich comparisons for Features. With @functools.total_ordering (Python 2.7+),

diff --git a/nltk/test/childes_fixt.py b/nltk/test/childes_fixt.py
@@ -1,4 +1,3 @@
-
 def setup_module():
     import pytest
     import nltk.data

diff --git a/nltk/test/classify_fixt.py b/nltk/test/classify_fixt.py
@@ -1,5 +1,3 @@
-
-
 # most of classify.doctest requires numpy
 def setup_module():
     import pytest

diff --git a/nltk/test/discourse_fixt.py b/nltk/test/discourse_fixt.py
@@ -1,5 +1,3 @@
-
-
 # FIXME: the entire discourse.doctest is skipped if Prover9/Mace4 is
 # not installed, but there are pure-python parts that don't need Prover9.
 def setup_module():

diff --git a/nltk/test/gensim_fixt.py b/nltk/test/gensim_fixt.py
@@ -1,5 +1,3 @@
-
-
 def setup_module():
     import pytest
     pytest.importorskip("gensim")
diff --git a/nltk/test/gluesemantics_malt_fixt.py b/nltk/test/gluesemantics_malt_fixt.py
@@ -1,5 +1,3 @@
-
-
 def setup_module():
     import pytest
     from nltk.parse.malt import MaltParser

diff --git a/nltk/test/inference_fixt.py b/nltk/test/inference_fixt.py
@@ -1,5 +1,3 @@
-
-
 def setup_module():
     import pytest
     from nltk.inference.mace import Mace

diff --git a/nltk/test/nonmonotonic_fixt.py b/nltk/test/nonmonotonic_fixt.py
@@ -1,5 +1,3 @@
-
-
 def setup_module():
     import pytest
     from nltk.inference.mace import Mace

diff --git a/nltk/test/portuguese_en_fixt.py b/nltk/test/portuguese_en_fixt.py
@@ -1,4 +1,3 @@
-
 def setup_module():
     import pytest
 

diff --git a/nltk/test/probability_fixt.py b/nltk/test/probability_fixt.py
@@ -1,5 +1,3 @@
-
-
 # probability.doctest uses HMM which requires numpy;
 # skip probability.doctest if numpy is not available