<a href="https://colab.research.google.com/github/tannisthamaiti/Market-Analytics/blob/main/quick_nlp_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

# Sample small text dataset for quick training
text_data = [
    ("hello world", 0),
    ("hi there", 0),
    ("deep learning", 1),
    ("neural network", 1),
    ("machine learning", 1),
    ("hello again", 0),
]

# Creating a simple vocabulary
vocab = sorted(set(word for sentence, _ in text_data for word in sentence.split()))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
vocab_size = len(vocab)

# Function to encode text data
def encode_sentence(sentence):
    return [word_to_idx[word] for word in sentence.split()]

# Dataset class
class TextDataset(Dataset):
    def __init__(self, data):
        self.data = [(torch.tensor(encode_sentence(sentence), dtype=torch.long), label) for sentence, label in data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Split dataset
train_data, test_data = train_test_split(text_data, test_size=0.2, random_state=42)
train_dataset = TextDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

# Define a simple text classification model
class TextClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(TextClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 2)  # Binary classification

    def forward(self, x):
        x = self.embedding(x)  # (batch, seq_len, embed_dim)
        _, (hidden, _) = self.lstm(x)  # (batch, hidden_dim)
        out = self.fc(hidden[-1])  # Output layer
        return out

# Model, Loss, Optimizer
embed_dim = 8
hidden_dim = 16
model = TextClassifier(vocab_size, embed_dim, hidden_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop (lightweight)
epochs = 5
for epoch in range(epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        #inputs = inputs.unsqueeze(1)  # Ensure correct shape for LSTM
        outputs = model(inputs)
        loss = criterion(outputs, torch.tensor([labels], dtype=torch.long))
        loss.backward()
        optimizer.step()

# Save trained model weights
torch.save(model.state_dict(), "text_classifier_weights.pth")
print("Model weights saved successfully as text_classifier_weights.pth")


Model weights saved successfully as text_classifier_weights.pth


In [12]:
model = torch.load("/content/text_classifier_weights.pth")






    # Single sentence for testing
sentence = "hello world"

    # Preprocess and encode the sentence
encoded_sentence = torch.tensor(encode_sentence(sentence), dtype=torch.long)
input_tensor = encoded_sentence.clone().detach().unsqueeze(0)  # Add batch dimension
output = model(input_tensor)
predicted_label = torch.argmax(output).item()


UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL __main__.TextClassifier was not an allowed global by default. Please use `torch.serialization.add_safe_globals([TextClassifier])` or the `torch.serialization.safe_globals([TextClassifier])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.