In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

In [20]:
# Load dataset
categories = ['rec.autos', 'sci.med', 'comp.graphics']
newsgroups = fetch_20newsgroups(subset='all', categories=categories)
vectorizer = CountVectorizer(max_features=5000)
X = vectorizer.fit_transform(newsgroups.data).toarray()
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(newsgroups.target)


In [21]:
print(y[:10])

[1 0 1 1 0 0 2 0 2 0]


In [22]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_seq = torch.tensor(X_train, dtype=torch.float32)
X_test_seq = torch.tensor(X_test, dtype=torch.float32)
y_train_seq = torch.tensor(y_train, dtype=torch.long)
y_test_seq = torch.tensor(y_test, dtype=torch.long)

In [27]:
# Parameters
input_size = X_train_seq.shape[1]
hidden_size = 32
num_layers = 2
num_classes = 3

# Building an RNN model for text

As a data analyst at PyBooks, you often encounter datasets that contain sequential information, such as customer interactions, time series data, or text documents. RNNs can effectively analyze and extract insights from such data. In this exercise, you will dive into the Newsgroup dataset that has already been processed and encoded for you. This dataset comprises articles from different categories. Your task is to apply an RNN to classify these articles into three categories:

rec.autos, sci.med, and comp.graphics.

This and the following exercises use the fetch_20newsgroups dataset from sklearn.

* Complete the RNN class with an RNN layer and a fully connected linear layer.
* Initialize the model.
* Train the RNN model for ten epochs by zeroing the gradients.

In [28]:
# Complete the RNN class
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)        
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :] 
        out = self.fc(out)
        return out

# Initialize the model
rnn_model = RNNModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn_model.parameters(), lr=0.01)



In [25]:
# Train the model for ten epochs and zero the gradients
for epoch in range(50):
    optimizer.zero_grad()
    outputs = rnn_model(X_train_seq.unsqueeze(1))  # Add a dimension for batch_first
    loss = criterion(outputs, y_train_seq)
    loss.backward()
    optimizer.step()
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Epoch: 1, Loss: 1.118180274963379
Epoch: 2, Loss: 0.9855406880378723
Epoch: 3, Loss: 0.8225328922271729
Epoch: 4, Loss: 0.6509308815002441
Epoch: 5, Loss: 0.48327064514160156
Epoch: 6, Loss: 0.3461679220199585
Epoch: 7, Loss: 0.24381475150585175
Epoch: 8, Loss: 0.1678209751844406
Epoch: 9, Loss: 0.11352389305830002
Epoch: 10, Loss: 0.07626967132091522
Epoch: 11, Loss: 0.051638681441545486
Epoch: 12, Loss: 0.03534931689500809
Epoch: 13, Loss: 0.024788090959191322
Epoch: 14, Loss: 0.01802026852965355
Epoch: 15, Loss: 0.013481352478265762
Epoch: 16, Loss: 0.010755353607237339
Epoch: 17, Loss: 0.00882408395409584
Epoch: 18, Loss: 0.007347017992287874
Epoch: 19, Loss: 0.006084200460463762
Epoch: 20, Loss: 0.004938804544508457
Epoch: 21, Loss: 0.00394572876393795
Epoch: 22, Loss: 0.0031631074380129576
Epoch: 23, Loss: 0.002519233152270317
Epoch: 24, Loss: 0.0019569513387978077
Epoch: 25, Loss: 0.0015221353387460113
Epoch: 26, Loss: 0.0012471586233004928
Epoch: 27, Loss: 0.0010795190464705229

Model loss should always decrease as it shows how well the model has learned new patterns. Keep up the excellent work!

In [38]:
# Evaluate the model
rnn_model.eval()
with torch.no_grad():
    outputs = rnn_model(X_test_seq)
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracy_score(y_test_seq, predicted)
    print(f'Test Accuracy: {accuracy:.2f}')

RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tensor

# Building an LSTM model for text

At PyBooks, the team is constantly seeking to enhance the user experience by leveraging the latest advancements in technology. In line with this vision, they have assigned you a critical task. The team wants you to explore the potential of another powerful tool: LSTM, known for capturing more complexities in data patterns. You are working with the same Newsgroup dataset, with the objective remaining unchanged: to classify news articles into three distinct categories:

rec.autos, sci.med, and comp.graphics.

* Set up an LSTM model by completing the LSTM and linear layers with the necessary parameters.
* Initialize the model with the necessary parameters.
* Train the LSTM model resetting the gradients to zero and passing the input data X_train_seq through the model.
* Calculate the loss based on the predicted outputs and the true labels.




In [31]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.lstm(x.unsqueeze(1), (h0, c0))  # Reshape input to (batch_size, seq_length, input_size)
        out = out[:, -1, :] 
        out = self.fc(out)
        return out


In [32]:
# Initialize model with required parameters
lstm_model = LSTMModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.01)

# Train the model by passing the correct parameters and zeroing the gradient
for epoch in range(10): 
    optimizer.zero_grad()
    outputs = lstm_model(X_train_seq)
    loss = criterion(outputs, y_train_seq)
    loss.backward()
    optimizer.step()
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Epoch: 1, Loss: 1.1001509428024292
Epoch: 2, Loss: 1.072670817375183
Epoch: 3, Loss: 1.0309168100357056
Epoch: 4, Loss: 0.9662202000617981
Epoch: 5, Loss: 0.8806395530700684
Epoch: 6, Loss: 0.7788384556770325
Epoch: 7, Loss: 0.6678829789161682
Epoch: 8, Loss: 0.5554749965667725
Epoch: 9, Loss: 0.4473586976528168
Epoch: 10, Loss: 0.3498893678188324


In [33]:
# Evaluate the model
lstm_model.eval()
with torch.no_grad():
    outputs = lstm_model(X_test_seq)
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracy_score(y_test_seq, predicted)
    print(f'Test Accuracy: {accuracy:.2f}')

Test Accuracy: 0.95


 The output presents model loss that would keep decreasing with each epoch. This information could be utilized by the team at PyBooks to compare with other models. Keep up the great w

# Building a GRU model for text

At PyBooks, the team has been impressed with the performance of the two models you previously trained. However, in their pursuit of excellence, they want to ensure the selection of the absolute best model for the task at hand. Therefore, they have asked you to further expand the project by experimenting with the capabilities of GRU models, renowned for their efficiency and effectiveness in text classification tasks. Your new assignment is to apply the GRU model to classify articles from the Newsgroup dataset into the following categories:

rec.autos, sci.med, and comp.graphics.

* Complete the GRU class with the required parameters.
* Initialize the model with the same parameters.
* Train the model: pass the parameters to the criterion function, and backpropagate the loss.

In [35]:
# Complete the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.gru(x.unsqueeze(1), h0)  # Reshape input to (batch_size, seq_length, input_size)
        out = out[:, -1, :]
        out = self.fc(out)
        return out


In [36]:
# Initialize the model
gru_model = GRUModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(gru_model.parameters(), lr=0.01)

# Train the model and backpropagate the loss after initialization
for epoch in range(15):
    optimizer.zero_grad()
    outputs = gru_model(X_train_seq)
    loss = criterion(outputs, y_train_seq)
    loss.backward()
    optimizer.step()
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Epoch: 1, Loss: 1.1020915508270264
Epoch: 2, Loss: 1.0381512641906738
Epoch: 3, Loss: 0.9452874064445496
Epoch: 4, Loss: 0.8157745003700256
Epoch: 5, Loss: 0.6671879291534424
Epoch: 6, Loss: 0.5211980938911438
Epoch: 7, Loss: 0.3891841173171997
Epoch: 8, Loss: 0.27994370460510254
Epoch: 9, Loss: 0.19415798783302307
Epoch: 10, Loss: 0.13082453608512878
Epoch: 11, Loss: 0.08623056858778
Epoch: 12, Loss: 0.056182049214839935
Epoch: 13, Loss: 0.036781881004571915
Epoch: 14, Loss: 0.024478556588292122
Epoch: 15, Loss: 0.016716597601771355


In [37]:
# Evaluate the model
gru_model.eval()
with torch.no_grad():
    outputs = gru_model(X_test_seq)
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracy_score(y_test_seq, predicted)
    print(f'Test Accuracy: {accuracy:.2f}')

Test Accuracy: 0.96


You've effectively trained GRU models for text classification. The decreasing model loss across epochs is promising, and can be used by the PyBooks team for comparison with other models!


# For testing