diff --git a/packaging/torchtext/meta.yaml b/packaging/torchtext/meta.yaml index 13ff1a925f..9d7502200d 100644 --- a/packaging/torchtext/meta.yaml +++ b/packaging/torchtext/meta.yaml @@ -23,7 +23,6 @@ requirements: - python - requests - tqdm - - torchdata {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} build: diff --git a/setup.py b/setup.py index 92ba40d262..d8afa7a550 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def run(self): description="Text utilities and datasets for PyTorch", long_description=read("README.rst"), license="BSD", - install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", "torchdata"], + install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"], python_requires=">=3.7", classifiers=[ "Programming Language :: Python :: 3.7", diff --git a/torchtext/_download_hooks.py b/torchtext/_download_hooks.py index 505320efae..d740827c48 100644 --- a/torchtext/_download_hooks.py +++ b/torchtext/_download_hooks.py @@ -4,9 +4,12 @@ # This is to allow monkey-patching in fbcode from torch.hub import load_state_dict_from_url # noqa -from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401 +from torchtext._internal.module_utils import is_module_available from tqdm import tqdm +if is_module_available("torchdata"): + from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401 + def _stream_response(r, chunk_size=16 * 1024): total_size = int(r.headers.get("Content-length", 0)) diff --git a/torchtext/datasets/ag_news.py b/torchtext/datasets/ag_news.py index 5f1c7741f6..f07b3ae354 100644 --- a/torchtext/datasets/ag_news.py +++ b/torchtext/datasets/ag_news.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv", "test": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv", diff --git a/torchtext/datasets/amazonreviewfull.py b/torchtext/datasets/amazonreviewfull.py index 06e688279a..eb527a8046 100644 --- a/torchtext/datasets/amazonreviewfull.py +++ b/torchtext/datasets/amazonreviewfull.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA" MD5 = "57d28bd5d930e772930baddf36641c7c" diff --git a/torchtext/datasets/amazonreviewpolarity.py b/torchtext/datasets/amazonreviewpolarity.py index 9616dc1d9e..f4a47da008 100644 --- a/torchtext/datasets/amazonreviewpolarity.py +++ b/torchtext/datasets/amazonreviewpolarity.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM" MD5 = "fe39f8b653cada45afd5792e0f0e8f9b" diff --git a/torchtext/datasets/cc100.py b/torchtext/datasets/cc100.py index 4ce2e92dd8..755f277224 100644 --- a/torchtext/datasets/cc100.py +++ b/torchtext/datasets/cc100.py @@ -1,12 +1,15 @@ import os.path from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader +from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "http://data.statmt.org/cc-100/%s.txt.xz" VALID_CODES = { diff --git a/torchtext/datasets/cnndm.py b/torchtext/datasets/cnndm.py index 36b0347678..db65680d17 100644 --- a/torchtext/datasets/cnndm.py +++ b/torchtext/datasets/cnndm.py @@ -3,18 +3,20 @@ from functools import partial from typing import Union, Set, Tuple -from torchdata.datapipes.iter import ( - FileOpener, - IterableWrapper, - OnlineReader, - GDriveReader, -) from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import ( + FileOpener, + IterableWrapper, + OnlineReader, + GDriveReader, + ) + DATASET_NAME = "CNNDM" SPLIT_LIST = { diff --git a/torchtext/datasets/cola.py b/torchtext/datasets/cola.py index 214c435d03..a56c61572b 100644 --- a/torchtext/datasets/cola.py +++ b/torchtext/datasets/cola.py @@ -3,11 +3,13 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "https://nyu-mll.github.io/CoLA/cola_public_1.1.zip" MD5 = "9f6d88c3558ec424cd9d66ea03589aba" diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py index acbd9cbd0c..25b60e4cb7 100644 --- a/torchtext/datasets/conll2000chunking.py +++ b/torchtext/datasets/conll2000chunking.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz", diff --git a/torchtext/datasets/dbpedia.py b/torchtext/datasets/dbpedia.py index be86f1a98c..3afc414462 100644 --- a/torchtext/datasets/dbpedia.py +++ b/torchtext/datasets/dbpedia.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k" MD5 = "dca7b1ae12b1091090db52aa7ec5ca64" diff --git a/torchtext/datasets/enwik9.py b/torchtext/datasets/enwik9.py index cbd5e647a7..744cf22d8b 100644 --- a/torchtext/datasets/enwik9.py +++ b/torchtext/datasets/enwik9.py @@ -1,11 +1,13 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "http://mattmahoney.net/dc/enwik9.zip" MD5 = "3e773f8a1577fda2e27f871ca17f31fd" diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py index 09fba57b04..d9962342b4 100644 --- a/torchtext/datasets/imdb.py +++ b/torchtext/datasets/imdb.py @@ -3,12 +3,14 @@ from pathlib import Path from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory from torchtext.data.datasets_utils import _wrap_split_argument +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz" MD5 = "7c2ac02c03563afcf9b574c7e56c153a" diff --git a/torchtext/datasets/iwslt2016.py b/torchtext/datasets/iwslt2016.py index dd4b806e8c..1bc1386fa6 100644 --- a/torchtext/datasets/iwslt2016.py +++ b/torchtext/datasets/iwslt2016.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -11,6 +9,10 @@ _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8" _PATH = "2016-01.tgz" diff --git a/torchtext/datasets/iwslt2017.py b/torchtext/datasets/iwslt2017.py index 4767218bd7..1691e0c89c 100644 --- a/torchtext/datasets/iwslt2017.py +++ b/torchtext/datasets/iwslt2017.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -11,6 +9,10 @@ _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp" _PATH = "2017-01-trnmted.tgz" MD5 = "aca701032b1c4411afc4d9fa367796ba" diff --git a/torchtext/datasets/mnli.py b/torchtext/datasets/mnli.py index f4335c5ccf..ff27b18d6d 100644 --- a/torchtext/datasets/mnli.py +++ b/torchtext/datasets/mnli.py @@ -3,17 +3,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zip" diff --git a/torchtext/datasets/mrpc.py b/torchtext/datasets/mrpc.py index e9abea1721..d958865079 100644 --- a/torchtext/datasets/mrpc.py +++ b/torchtext/datasets/mrpc.py @@ -3,13 +3,15 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper + URL = { "train": "https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt", diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index 81f007a678..ce974e9471 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -2,15 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz", "valid": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz", diff --git a/torchtext/datasets/penntreebank.py b/torchtext/datasets/penntreebank.py index 1e0d9f295f..f6f8cc703c 100644 --- a/torchtext/datasets/penntreebank.py +++ b/torchtext/datasets/penntreebank.py @@ -2,15 +2,16 @@ from functools import partial from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", diff --git a/torchtext/datasets/qnli.py b/torchtext/datasets/qnli.py index aa71eeb208..47fd14ffd6 100644 --- a/torchtext/datasets/qnli.py +++ b/torchtext/datasets/qnli.py @@ -3,17 +3,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip" diff --git a/torchtext/datasets/qqp.py b/torchtext/datasets/qqp.py index 013a6a82a8..6ef8e18e97 100644 --- a/torchtext/datasets/qqp.py +++ b/torchtext/datasets/qqp.py @@ -1,11 +1,13 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv" MD5 = "b6d5672bd9dc1e66ab2bb020ebeafb8d" diff --git a/torchtext/datasets/rte.py b/torchtext/datasets/rte.py index 9b3925ccda..88cd29d09b 100644 --- a/torchtext/datasets/rte.py +++ b/torchtext/datasets/rte.py @@ -3,17 +3,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "https://dl.fbaipublicfiles.com/glue/data/RTE.zip" diff --git a/torchtext/datasets/sogounews.py b/torchtext/datasets/sogounews.py index 80c7c9af9a..a160c5b1a1 100644 --- a/torchtext/datasets/sogounews.py +++ b/torchtext/datasets/sogounews.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE" MD5 = "0c1700ba70b73f964dd8de569d3fd03e" diff --git a/torchtext/datasets/squad1.py b/torchtext/datasets/squad1.py index 5c83bcdec2..ef110da662 100644 --- a/torchtext/datasets/squad1.py +++ b/torchtext/datasets/squad1.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json", diff --git a/torchtext/datasets/squad2.py b/torchtext/datasets/squad2.py index 48ef86556c..11b3fdd5fc 100644 --- a/torchtext/datasets/squad2.py +++ b/torchtext/datasets/squad2.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = { "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json", diff --git a/torchtext/datasets/sst2.py b/torchtext/datasets/sst2.py index 132b22d68d..40c269ac7e 100644 --- a/torchtext/datasets/sst2.py +++ b/torchtext/datasets/sst2.py @@ -2,17 +2,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "https://dl.fbaipublicfiles.com/glue/data/SST-2.zip" diff --git a/torchtext/datasets/stsb.py b/torchtext/datasets/stsb.py index 4689f2bcec..153391d5da 100644 --- a/torchtext/datasets/stsb.py +++ b/torchtext/datasets/stsb.py @@ -2,17 +2,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz" diff --git a/torchtext/datasets/udpos.py b/torchtext/datasets/udpos.py index 3c7b76b124..a7bf1b2184 100644 --- a/torchtext/datasets/udpos.py +++ b/torchtext/datasets/udpos.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip" MD5 = "bdcac7c52d934656bae1699541424545" diff --git a/torchtext/datasets/wikitext103.py b/torchtext/datasets/wikitext103.py index 0914d708e9..fad94bf2fe 100644 --- a/torchtext/datasets/wikitext103.py +++ b/torchtext/datasets/wikitext103.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip" MD5 = "9ddaacaf6af0710eda8c456decff7832" diff --git a/torchtext/datasets/wikitext2.py b/torchtext/datasets/wikitext2.py index ec686b94cd..e7ad4a85f9 100644 --- a/torchtext/datasets/wikitext2.py +++ b/torchtext/datasets/wikitext2.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import HttpReader + URL = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip" MD5 = "542ccefacc6c27f945fb54453812b3cd" diff --git a/torchtext/datasets/wnli.py b/torchtext/datasets/wnli.py index c864275899..5c0226e8c7 100644 --- a/torchtext/datasets/wnli.py +++ b/torchtext/datasets/wnli.py @@ -2,17 +2,19 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - -# we import HttpReader from _download_hooks so we can swap out public URLs -# with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, _wrap_split_argument, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + URL = "https://dl.fbaipublicfiles.com/glue/data/WNLI.zip" diff --git a/torchtext/datasets/yahooanswers.py b/torchtext/datasets/yahooanswers.py index 9fad10ff1d..1721adb2bf 100644 --- a/torchtext/datasets/yahooanswers.py +++ b/torchtext/datasets/yahooanswers.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU" MD5 = "f3f9899b997a42beb24157e62e3eea8d" diff --git a/torchtext/datasets/yelpreviewfull.py b/torchtext/datasets/yelpreviewfull.py index 1272dae45c..a6e355d9d7 100644 --- a/torchtext/datasets/yelpreviewfull.py +++ b/torchtext/datasets/yelpreviewfull.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0" MD5 = "f7ddfafed1033f68ec72b9267863af6c" diff --git a/torchtext/datasets/yelpreviewpolarity.py b/torchtext/datasets/yelpreviewpolarity.py index 90e1e31e59..82c04e6efc 100644 --- a/torchtext/datasets/yelpreviewpolarity.py +++ b/torchtext/datasets/yelpreviewpolarity.py @@ -2,14 +2,16 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, _create_dataset_directory, ) +if is_module_available("torchdata"): + from torchdata.datapipes.iter import FileOpener, IterableWrapper + from torchtext._download_hooks import GDriveReader + URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg" MD5 = "620c8ae4bd5a150b730f1ba9a7c6a4d3"