Skip to content

Commit

Permalink
Revert "Add Torchdata as a requirement and remove conditional imports…
Browse files Browse the repository at this point in the history
… of Torchdata (#1961) (#1962)"

This reverts commit 771b5a2.
  • Loading branch information
atalman committed Dec 9, 2022
1 parent a075bcc commit e1e969d
Show file tree
Hide file tree
Showing 34 changed files with 151 additions and 88 deletions.
1 change: 0 additions & 1 deletion packaging/torchtext/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ requirements:
- python
- requests
- tqdm
- torchdata
{{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}

build:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def run(self):
description="Text utilities and datasets for PyTorch",
long_description=read("README.rst"),
license="BSD",
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", "torchdata"],
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"],
python_requires=">=3.7",
classifiers=[
"Programming Language :: Python :: 3.7",
Expand Down
5 changes: 4 additions & 1 deletion torchtext/_download_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

# This is to allow monkey-patching in fbcode
from torch.hub import load_state_dict_from_url # noqa
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
from torchtext._internal.module_utils import is_module_available
from tqdm import tqdm

if is_module_available("torchdata"):
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401


def _stream_response(r, chunk_size=16 * 1024):
total_size = int(r.headers.get("Content-length", 0))
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/ag_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = {
"train": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv",
"test": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv",
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/amazonreviewfull.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader

URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA"

MD5 = "57d28bd5d930e772930baddf36641c7c"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/amazonreviewpolarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader

URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM"

MD5 = "fe39f8b653cada45afd5792e0f0e8f9b"
Expand Down
7 changes: 5 additions & 2 deletions torchtext/datasets/cc100.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import os.path
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = "http://data.statmt.org/cc-100/%s.txt.xz"

VALID_CODES = {
Expand Down
14 changes: 8 additions & 6 deletions torchtext/datasets/cnndm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
from functools import partial
from typing import Union, Set, Tuple

from torchdata.datapipes.iter import (
FileOpener,
IterableWrapper,
OnlineReader,
GDriveReader,
)
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import (
FileOpener,
IterableWrapper,
OnlineReader,
GDriveReader,
)

DATASET_NAME = "CNNDM"

SPLIT_LIST = {
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = "https://nyu-mll.github.io/CoLA/cola_public_1.1.zip"

MD5 = "9f6d88c3558ec424cd9d66ea03589aba"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/conll2000chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = {
"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz",
"test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz",
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/dbpedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader

URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k"

MD5 = "dca7b1ae12b1091090db52aa7ec5ca64"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/enwik9.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import _create_dataset_directory

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = "http://mattmahoney.net/dc/enwik9.zip"

MD5 = "3e773f8a1577fda2e27f871ca17f31fd"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from pathlib import Path
from typing import Tuple, Union

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import _create_dataset_directory
from torchtext.data.datasets_utils import _wrap_split_argument

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

MD5 = "7c2ac02c03563afcf9b574c7e56c153a"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/iwslt2016.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_clean_files,
Expand All @@ -11,6 +9,10 @@
_wrap_split_argument,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader

URL = "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8"

_PATH = "2016-01.tgz"
Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/iwslt2017.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_clean_files,
Expand All @@ -11,6 +9,10 @@
_wrap_split_argument,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader

URL = "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp"
_PATH = "2017-01-trnmted.tgz"
MD5 = "aca701032b1c4411afc4d9fa367796ba"
Expand Down
12 changes: 7 additions & 5 deletions torchtext/datasets/mnli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper

# we import HttpReader from _download_hooks so we can swap out public URLs
# with interal URLs when the dataset is used within Facebook
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_create_dataset_directory,
_wrap_split_argument,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper

# we import HttpReader from _download_hooks so we can swap out public URLs
# with interal URLs when the dataset is used within Facebook
from torchtext._download_hooks import HttpReader


URL = "https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zip"

Expand Down
4 changes: 3 additions & 1 deletion torchtext/datasets/mrpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper


URL = {
"train": "https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt",
Expand Down
7 changes: 4 additions & 3 deletions torchtext/datasets/multi30k.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
from functools import partial
from typing import Union, Tuple

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader # noqa
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = {
"train": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz",
"valid": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz",
Expand Down
7 changes: 4 additions & 3 deletions torchtext/datasets/penntreebank.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
from functools import partial
from typing import Tuple, Union

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import GDriveReader # noqa
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_wrap_split_argument,
_create_dataset_directory,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = {
"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt",
"test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt",
Expand Down
12 changes: 7 additions & 5 deletions torchtext/datasets/qnli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper

# we import HttpReader from _download_hooks so we can swap out public URLs
# with interal URLs when the dataset is used within Facebook
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import (
_create_dataset_directory,
_wrap_split_argument,
)

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper

# we import HttpReader from _download_hooks so we can swap out public URLs
# with interal URLs when the dataset is used within Facebook
from torchtext._download_hooks import HttpReader


URL = "https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip"

Expand Down
6 changes: 4 additions & 2 deletions torchtext/datasets/qqp.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
from functools import partial

from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader
from torchtext._internal.module_utils import is_module_available
from torchtext.data.datasets_utils import _create_dataset_directory

if is_module_available("torchdata"):
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

URL = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"

MD5 = "b6d5672bd9dc1e66ab2bb020ebeafb8d"
Expand Down

0 comments on commit e1e969d

Please sign in to comment.