### Sentiment analysis on movie reviews using Python

In [2]:
import torch
from torchtext.datasets import IMDB
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define tokenizer
tokenizer = get_tokenizer("basic_english")

# Load IMDb dataset
train_iter = IMDB(split='train')
test_iter = IMDB(split='test')

# Tokenize and numericalize the text
def tokenize_and_numericalize(iterator):
    for review, label in iterator:
        yield tokenizer(review), 1 if label == 'pos' else 0

# Build vocabulary
vocab = build_vocab_from_iterator(tokenize_and_numericalize(train_iter), specials=["<unk>"])

# Define text transformation functions
text_transform = lambda x: [vocab[token] for token in tokenizer(x)]
label_transform = lambda x: int(x)

# Convert text data to tensors
def collate_batch(batch):
    labels = torch.tensor([label_transform(label) for _, label in batch], dtype=torch.float64)
    texts = [torch.tensor(text_transform(review), dtype=torch.int64) for review, _ in batch]
    texts = torch.nn.utils.rnn.pad_sequence(texts, batch_first=True)
    return texts, labels.to(device)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_iter, batch_size=32, shuffle=True, collate_fn=collate_batch)
test_loader = torch.utils.data.DataLoader(test_iter, batch_size=32, shuffle=False, collate_fn=collate_batch)

# Example usage:
for batch_idx, (data, target) in enumerate(train_loader):
    # Process batch
    data, target = data.to(device), target.to(device)
    print(f"Batch {batch_idx + 1}: Data shape: {data.shape}, Target shape: {target.shape}")


ImportError: cannot import name 'DILL_AVAILABLE' from 'torch.utils.data.datapipes.utils.common' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/utils/data/datapipes/utils/common.py)