In [None]:
# Importing the Required Libraries
import torch
from transformers import AutoModel, AutoTokenizer
from sentence_transformers import SentenceTransformer
import datasets
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Load a pre-trained model optimized for sentence embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')  # 'all-MiniLM-L6-v2' model is chosen in this case, but you can also use other pre-trained models

# Define some custom categories for these sentences
categories = {
    "Promotion": 0,  # Label for promotion-related sentences
    "Customer Feedback": 1,  # Label for customer feedback sentences
}

# Example sentences for testing
sentences = ["Enter your coupon code at checkout to unlock exclusive discounts.",
             "Use promo codes at checkout to enjoy extra savings on your favorite products.",
             "Leave a review and share your experience with others."]

# Corresponding labels based on the custom categories
labels = [categories["Promotion"], categories["Promotion"], categories["Customer Feedback"]]

# Generate embeddings for the sentences
embeddings = model.encode(sentences)

# Print embeddings
for i, sentence in enumerate(sentences):
    print(f"Sentence: {sentence}")
    print(f"Embedding: {embeddings[i]}")
    print("\n")

Sentence: Enter your coupon code at checkout to unlock exclusive discounts.
Embedding: [-1.14206187e-01 -2.74209883e-02 -3.96874584e-02 -2.79672686e-02
 -2.72465348e-02  5.41761070e-02  3.75798494e-02 -2.18264461e-02
  3.07117291e-02  4.48378883e-02  1.25748679e-01  9.16159432e-03
 -5.22613525e-02 -7.82929212e-02  9.25529525e-02 -1.31356204e-02
 -8.63155201e-02  8.46766215e-03 -7.19874874e-02 -4.35187370e-02
  2.34923698e-02 -1.28132731e-01 -2.38076486e-02  3.28224301e-02
  2.50091273e-02 -1.79319191e-04 -1.20627927e-02  1.94151700e-02
  2.21341327e-02  4.41646427e-02  6.80269897e-02 -2.59106550e-02
  6.05869442e-02  1.69127118e-02  8.23498815e-02 -6.00091890e-02
 -1.24941111e-01 -6.69811144e-02 -7.94921536e-03  1.65649708e-02
 -8.25571567e-02 -1.24615151e-02 -7.45374039e-02  8.79687432e-04
  5.68990894e-02 -5.52479103e-02 -8.12460855e-02  5.20990193e-02
  6.82877600e-02  1.99729800e-02  1.25161335e-01  1.84190888e-02
 -8.68602544e-02 -1.24610141e-02 -1.38541246e-02 -8.19643494e-03
 -1

In [None]:
# Example sentences for testing
sentences = ["Your coupon code exclusive discounts is expired.",
             "Use promo codes at checkout is not mandatory.",
             "Your review is appreciated."]

# Generate embeddings for the sentences
embeddings = model.encode(sentences, convert_to_tensor=True)

# Define the classification and sentiment heads (no class structure)
classification_head = nn.Linear(384, 4)  # 384-dimensional input, 4 classes for sentence classification
sentiment_head = nn.Linear(384, 3)  # 384-dimensional input, 3 classes for sentiment analysis

# Apply Softmax to the outputs to get probabilities
softmax = nn.Softmax(dim=1)

# Get the classification and sentiment outputs
classification_output = classification_head(embeddings)
sentiment_output = sentiment_head(embeddings)

# Apply Softmax to the outputs
classification_probs = softmax(classification_output)
sentiment_probs = softmax(sentiment_output)

# Map the index of max probability to category
classification_labels = ['Promotion Expiry', 'Coupon Usage', 'Customer Feedback', 'Discount Offers']
sentiment_labels = ['Negative', 'Neutral', 'Positive']

for i, sentence in enumerate(sentences):
    # Classification
    classification_index = torch.argmax(classification_probs[i]).item()
    classification_result = classification_labels[classification_index]

    # Sentiment
    sentiment_index = torch.argmax(sentiment_probs[i]).item()
    sentiment_result = sentiment_labels[sentiment_index]

    # Print out classification probabilities for debugging
    print(f"Sentence: {sentence}")
    print(f"Classification Output (Category): {classification_result}")
    print(f"Classification Probabilities: {classification_probs[i]}")
    print(f"Sentiment Output: {sentiment_result}")
    print(f"Sentiment Probabilities: {sentiment_probs[i]}")
    print("\n")

Sentence: Your coupon code exclusive discounts is expired.
Classification Output (Category): Customer Feedback
Classification Probabilities: tensor([0.2574, 0.2455, 0.2589, 0.2381], grad_fn=<SelectBackward0>)
Sentiment Output: Negative
Sentiment Probabilities: tensor([0.3490, 0.3233, 0.3277], grad_fn=<SelectBackward0>)


Sentence: Use promo codes at checkout is not mandatory.
Classification Output (Category): Customer Feedback
Classification Probabilities: tensor([0.2492, 0.2491, 0.2622, 0.2395], grad_fn=<SelectBackward0>)
Sentiment Output: Negative
Sentiment Probabilities: tensor([0.3550, 0.3183, 0.3267], grad_fn=<SelectBackward0>)


Sentence: Your review is appreciated.
Classification Output (Category): Customer Feedback
Classification Probabilities: tensor([0.2552, 0.2530, 0.2558, 0.2360], grad_fn=<SelectBackward0>)
Sentiment Output: Negative
Sentiment Probabilities: tensor([0.3519, 0.3145, 0.3336], grad_fn=<SelectBackward0>)




#### Should the Entire Network be Frozen
* Freezing the entire network is typically not ideal when working with task-specific heads that need to adapt to specific problems, such as sentiment analysis or sentence classification. However, it might be useful in cases where you’re limited by computational resources and want a quick solution.

#### Should Only the Transformer Backbone be Frozen?
* This approach allows the model to learn task-specific features while retaining the powerful language representations learned during pre-training.
But if the transformer backbone is frozen, the model might not adapt perfectly to the nuances of the tasks.

* Freezing one task-specific head means the model will adapt one task while keeping the other task’s head fixed. This can be useful in a multi-task learning scenario where one task is more complex or requires more adjustment.

### Transfer learning process
* In general, transfer learning is beneficial when you have a pre-trained model that has learned useful knowledge from a large dataset. By freezing the backbone and fine-tuning the heads, you can leverage the general language knowledge and apply it to specific tasks.


In [None]:
# Training Loop

# Loss functions and optimizer
criterion_classification = nn.CrossEntropyLoss()
criterion_sentiment = nn.CrossEntropyLoss()
optimizer = optim.Adam(list(classification_head.parameters()) + list(sentiment_head.parameters()), lr=0.001)

# Hypothetical Dataset (Sentences and Random Labels)
sentences = ["Enter your coupon code at checkout to unlock exclusive discounts.",
             "Use promo codes at checkout to enjoy extra savings on your favorite products.",
             "Leave a review and share your experience with others."]

# Simulate sentence embeddings (Replace with real embeddings in actual training)
embeddings = torch.randn(len(sentences), 384)

# Random labels for classification and sentiment tasks
classification_labels = torch.randint(0, 4, (len(sentences),))
sentiment_labels = torch.randint(0, 3, (len(sentences),))

# Training Loop (Single Epoch for Demonstration)
num_epochs = 5
for epoch in range(num_epochs):
    classification_head.train()
    sentiment_head.train()

    optimizer.zero_grad()  # Reset gradients

    # Forward pass through both heads
    classification_output = classification_head(embeddings)
    sentiment_output = sentiment_head(embeddings)

    # Compute Losses
    loss_classification = criterion_classification(classification_output, classification_labels)
    loss_sentiment = criterion_sentiment(sentiment_output, sentiment_labels)

    total_loss = loss_classification + loss_sentiment  # Combined loss for multi-task learning

    # Backward pass
    total_loss.backward()
    optimizer.step()

    # Print progress
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss.item():.4f}")

Epoch [1/5], Loss: 2.8387
Epoch [2/5], Loss: 2.3039
Epoch [3/5], Loss: 1.8347
Epoch [4/5], Loss: 1.4353
Epoch [5/5], Loss: 1.1057


  Loss is decreasing steadily across epochs, which is a good sign! It means the model is learning and adjusting its weights effectively.