In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from tqdm import tqdm
from transformers import BertTokenizer
from gensim.models import KeyedVectors
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
import gensim.downloader as api

# Load AG News Dataset
dataset = load_dataset('ag_news')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize(text):
    return tokenizer(text, padding='max_length', truncation=True, return_tensors='pt', max_length=128)

# Load Word2Vec Embeddings
word2vec = api.load('word2vec-google-news-300')
def get_word2vec_embedding(tokens):
    embeddings = []
    for token in tokens:
        if token in word2vec:
            embeddings.append(word2vec[token])
    if len(embeddings) == 0:
        return np.zeros(word2vec.vector_size)
    return np.mean(embeddings, axis=0)

class AGNewsDataset(data.Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        tokens = tokenizer.tokenize(self.texts[idx])
        embedding = get_word2vec_embedding(tokens)
        label = self.labels[idx]
        return torch.tensor(embedding, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

train_texts = [item['text'] for item in dataset['train']]
train_labels = [item['label'] for item in dataset['train']]
test_texts = [item['text'] for item in dataset['test']]
test_labels = [item['label'] for item in dataset['test']]

train_dataset = AGNewsDataset(train_texts, train_labels)
test_dataset = AGNewsDataset(test_texts, test_labels)

train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False)

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(1, 100, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(2)
        self.fc1 = nn.Linear(100 * (word2vec.vector_size // 2), 4)  # 4 classes in AG News

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Evaluation
num_epochs = 10
best_accuracy = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for inputs, labels in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_accuracy = 100 * correct / total
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')
    
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        torch.save(model.state_dict(), 'best_model.pt')

# Load the best model
model.load_state_dict(torch.load('best_model.pt'))

# Generate Classification Report
y_true = []
y_pred = []
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(labels.tolist())
        y_pred.extend(predicted.tolist())

print(classification_report(y_true, y_pred, target_names=dataset['test'].features['label'].names))


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:19<00:00, 13.41it/s]


Epoch [1/10], Train Loss: 912.2557, Train Accuracy: 83.22%, Test Accuracy: 87.80%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:17<00:00, 13.66it/s]


Epoch [2/10], Train Loss: 675.8879, Train Accuracy: 87.84%, Test Accuracy: 88.41%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:16<00:00, 13.76it/s]


Epoch [3/10], Train Loss: 643.8324, Train Accuracy: 88.39%, Test Accuracy: 88.46%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:16<00:00, 13.69it/s]


Epoch [4/10], Train Loss: 623.2161, Train Accuracy: 88.72%, Test Accuracy: 88.54%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:18<00:00, 13.58it/s]


Epoch [5/10], Train Loss: 610.8825, Train Accuracy: 88.91%, Test Accuracy: 88.43%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:18<00:00, 13.56it/s]


Epoch [6/10], Train Loss: 600.3323, Train Accuracy: 89.05%, Test Accuracy: 88.00%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:16<00:00, 13.71it/s]


Epoch [7/10], Train Loss: 588.4040, Train Accuracy: 89.24%, Test Accuracy: 88.33%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:19<00:00, 13.42it/s]


Epoch [8/10], Train Loss: 578.3513, Train Accuracy: 89.44%, Test Accuracy: 88.46%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:18<00:00, 13.59it/s]


Epoch [9/10], Train Loss: 573.1492, Train Accuracy: 89.40%, Test Accuracy: 88.91%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:16<00:00, 13.72it/s]


Epoch [10/10], Train Loss: 566.3935, Train Accuracy: 89.59%, Test Accuracy: 88.67%
              precision    recall  f1-score   support

       World       0.93      0.87      0.90      1900
      Sports       0.94      0.97      0.96      1900
    Business       0.83      0.86      0.84      1900
    Sci/Tech       0.86      0.86      0.86      1900

    accuracy                           0.89      7600
   macro avg       0.89      0.89      0.89      7600
weighted avg       0.89      0.89      0.89      7600



## OLD CODE

In [1]:
import gensim.downloader as api
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer
import numpy as np

# Load the Word2Vec model
word2vec_model = api.load('word2vec-google-news-300')

# Load the AG News dataset
dataset = load_dataset("ag_news")

# Load tokenizer from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Convert to PyTorch tensors
class AGNewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return encoding['input_ids'].squeeze(), encoding['attention_mask'].squeeze(), torch.tensor(label, dtype=torch.long)

# Tokenize and prepare datasets
def prepare_dataset(dataset, tokenizer, max_length):
    texts = [example['text'] for example in dataset]
    labels = [example['label'] for example in dataset]
    return AGNewsDataset(texts, labels, tokenizer, max_length)

# Set max length for padding
max_length = 128

# Create datasets
train_dataset = prepare_dataset(dataset['train'], tokenizer, max_length)
test_dataset = prepare_dataset(dataset['test'], tokenizer, max_length)

# Split training set into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

class CNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, embedding_matrix):
        super(CNNModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.conv1 = nn.Conv2d(1, 100, (3, embedding_dim))
        self.conv2 = nn.Conv2d(1, 100, (4, embedding_dim))
        self.conv3 = nn.Conv2d(1, 100, (5, embedding_dim))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(300, output_dim)
    
    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)  # Add channel dimension
        x1 = torch.relu(self.conv1(x)).squeeze(3)
        x1 = torch.max_pool1d(x1, x1.size(2)).squeeze(2)
        x2 = torch.relu(self.conv2(x)).squeeze(3)
        x2 = torch.max_pool1d(x2, x2.size(2)).squeeze(2)
        x3 = torch.relu(self.conv3(x)).squeeze(3)
        x3 = torch.max_pool1d(x3, x3.size(2)).squeeze(2)
        x = torch.cat((x1, x2, x3), 1)
        x = self.dropout(x)
        return self.fc(x)

# Training Parameters
BATCH_SIZE = 64
EPOCHS = 10
OUTPUT_DIM = 4
LR = 0.001

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)



In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report
import contextlib
import sys

# Assuming the necessary variables are defined: word2vec_model, CNNModel, device, OUTPUT_DIM, LR, EPOCHS, train_loader, val_loader, test_loader

class Logger:
    def __init__(self, filename):
        self.terminal = sys.stdout
        self.log = open(filename, 'w')

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)
        self.log.flush()

    def flush(self):
        self.terminal.flush()
        self.log.flush()

sys.stdout = Logger('output_log.txt')
sys.stderr = Logger('output_log.txt')

# Initialize embedding matrix
def build_embedding_matrix(word2vec_model, embedding_dim):
    embedding_matrix = np.zeros((len(word2vec_model), embedding_dim))
    for i, word in enumerate(word2vec_model.index_to_key):
        embedding_matrix[i] = word2vec_model[word]
    return torch.tensor(embedding_matrix, dtype=torch.float32)

# Build the embedding matrix
embedding_dim = 300  # Word2Vec uses 300-dimensional vectors
embedding_matrix = build_embedding_matrix(word2vec_model, embedding_dim)

# Model, loss function, and optimizer
model = CNNModel(len(word2vec_model), embedding_dim, OUTPUT_DIM, embedding_matrix).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    for texts, _, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)

        outputs = model(texts)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    val_labels = []
    val_preds = []
    with torch.no_grad():
        for texts, _, labels in tqdm(val_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)
            _, preds = torch.max(outputs, 1)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())

    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f'Epoch {epoch + 1}/{EPOCHS}, Validation Accuracy: {val_accuracy:.4f}')

# Final evaluation on test set
model.eval()
test_labels = []
test_preds = []
with torch.no_grad():
    for texts, _, labels in tqdm(test_loader, desc='Testing', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)
        outputs = model(texts)
        _, preds = torch.max(outputs, 1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

overall_accuracy = accuracy_score(test_labels, test_preds)
class_report = classification_report(test_labels, test_preds, target_names=['World', 'Sports', 'Business', 'Sci/Tech'])

print(f'Test Accuracy: {overall_accuracy:.4f}')
print('Classification Report:')
print(class_report)


Epoch 1/10:   0%|          | 0/1500 [00:26<?, ?batch/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.35 GiB (GPU 0; 11.92 GiB total capacity; 10.08 GiB already allocated; 1.69 GiB free; 10.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [2]:
import numpy as np
# Initialize embedding matrix
def build_embedding_matrix(word2vec_model, embedding_dim):
    embedding_matrix = np.zeros((len(word2vec_model), embedding_dim))
    for i, word in enumerate(word2vec_model.index_to_key):
        embedding_matrix[i] = word2vec_model[word]
    return torch.tensor(embedding_matrix, dtype=torch.float32)

# Build the embedding matrix
embedding_dim = 300  # Word2Vec uses 300-dimensional vectors
embedding_matrix = build_embedding_matrix(word2vec_model, embedding_dim)

# Model, loss function, and optimizer
model = CNNModel(len(word2vec_model), embedding_dim, OUTPUT_DIM, embedding_matrix).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    for texts, _, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)
        
        outputs = model(texts)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    model.eval()
    val_labels = []
    val_preds = []
    with torch.no_grad():
        for texts, _, labels in tqdm(val_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)
            _, preds = torch.max(outputs, 1)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())
    
    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f'Epoch {epoch + 1}/{EPOCHS}, Validation Accuracy: {val_accuracy:.4f}')

# Final evaluation on test set
model.eval()
test_labels = []
test_preds = []
with torch.no_grad():
    for texts, _, labels in tqdm(test_loader, desc='Testing', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)
        outputs = model(texts)
        _, preds = torch.max(outputs, 1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

overall_accuracy = accuracy_score(test_labels, test_preds)
class_report = classification_report(test_labels, test_preds, target_names=['World', 'Sports', 'Business', 'Sci/Tech'])

print(f'Test Accuracy: {overall_accuracy:.4f}')
print('Classification Report:')
print(class_report)


Epoch 1/10:   0%|                                                                                                                               | 0/1500 [00:25<?, ?batch/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.35 GiB (GPU 0; 11.92 GiB total capacity; 3.38 GiB already allocated; 1.59 GiB free; 3.40 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
import gensim.downloader as api
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
from tqdm import tqdm

# Download NLTK data
nltk.download('punkt')

# Download the Word2Vec model
word2vec_model = api.load('word2vec-google-news-300')

# Load the AG News dataset
dataset = load_dataset("ag_news")

# NLTK Tokenizer Function
def nltk_tokenizer(text):
    return word_tokenize(text.lower())

# Convert to PyTorch tensors
class AGNewsDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_length):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        tokenized_text = [self.vocab.get(token, self.vocab['<UNK>']) for token in text]
        if len(tokenized_text) < self.max_length:
            tokenized_text += [self.vocab['<PAD>']] * (self.max_length - len(tokenized_text))
        else:
            tokenized_text = tokenized_text[:self.max_length]
        return torch.tensor(tokenized_text, dtype=torch.long), torch.tensor(label, dtype=torch.long)

# Build vocabulary
def build_vocab(dataset, tokenizer):
    vocab = {'<PAD>': 0, '<UNK>': 1}
    for example in dataset:
        tokens = tokenizer(example['text'])
        for token in tokens:
            if token not in vocab:
                vocab[token] = len(vocab)
    return vocab

# Tokenize and build vocab
train_texts = [nltk_tokenizer(example['text']) for example in dataset['train']]
train_labels = [example['label'] for example in dataset['train']]
vocab = build_vocab(dataset['train'], nltk_tokenizer)

# Set max length for padding
max_length = 128

# Create dataset
full_dataset = AGNewsDataset(train_texts, train_labels, vocab, max_length)

# Split training set into training and validation sets
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Prepare test dataset
test_texts = [nltk_tokenizer(example['text']) for example in dataset['test']]
test_labels = [example['label'] for example in dataset['test']]
test_dataset = AGNewsDataset(test_texts, test_labels, vocab, max_length)

# Initialize embedding matrix
def build_embedding_matrix(vocab, word2vec_model, embedding_dim):
    embedding_matrix = np.zeros((len(vocab), embedding_dim))
    for word, idx in vocab.items():
        if word in word2vec_model:
            embedding_matrix[idx] = word2vec_model[word]
        else:
            embedding_matrix[idx] = np.random.normal(size=(embedding_dim,))
    return torch.tensor(embedding_matrix, dtype=torch.float32)

# Build the embedding matrix
embedding_dim = 300  # Word2Vec uses 300-dimensional vectors
embedding_matrix = build_embedding_matrix(vocab, word2vec_model, embedding_dim)

class CNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, embedding_matrix):
        super(CNNModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.conv1 = nn.Conv2d(1, 100, (3, embedding_dim))
        self.conv2 = nn.Conv2d(1, 100, (4, embedding_dim))
        self.conv3 = nn.Conv2d(1, 100, (5, embedding_dim))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(300, output_dim)
    
    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)  # Add channel dimension
        x1 = torch.relu(self.conv1(x)).squeeze(3)
        x1 = torch.max_pool1d(x1, x1.size(2)).squeeze(2)
        x2 = torch.relu(self.conv2(x)).squeeze(3)
        x2 = torch.max_pool1d(x2, x2.size(2)).squeeze(2)
        x3 = torch.relu(self.conv3(x)).squeeze(3)
        x3 = torch.max_pool1d(x3, x3.size(2)).squeeze(2)
        x = torch.cat((x1, x2, x3), 1)
        x = self.dropout(x)
        return self.fc(x)

# Training Parameters
BATCH_SIZE = 64
EPOCHS = 10
OUTPUT_DIM = 4
LR = 0.001

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Model, loss function, and optimizer
model = CNNModel(len(vocab), embedding_dim, OUTPUT_DIM, embedding_matrix).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    for texts, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)
        
        outputs = model(texts)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    model.eval()
    val_labels = []
    val_preds = []
    with torch.no_grad():
        for texts, labels in tqdm(val_loader, desc=f'Epoch {epoch + 1}/{EPOCHS}', unit='batch'):
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)
            _, preds = torch.max(outputs, 1)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())
    
    val_accuracy = accuracy_score(val_labels, val_preds)
    print(f'Epoch {epoch + 1}/{EPOCHS}, Validation Accuracy: {val_accuracy:.4f}')

# Final evaluation on test set
model.eval()
test_labels = []
test_preds = []
with torch.no_grad():
    for texts, labels in tqdm(test_loader, desc='Testing', unit='batch'):
        texts, labels = texts.to(device), labels.to(device)
        outputs = model(texts)
        _, preds = torch.max(outputs, 1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

overall_accuracy = accuracy_score(test_labels, test_preds)
class_report = classification_report(test_labels, test_preds, target_names=['World', 'Sports', 'Business', 'Sci/Tech'])

print(f'Test Accuracy: {overall_accuracy:.4f}')
print('Classification Report:')
print(class_report)


[nltk_data] Downloading package punkt to /home/IAIS/rrao/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Epoch 1/10:   2%|██▎                                                                                                                | 30/1500 [14:04<11:54:50, 29.18s/batch]