In [1]:
# Question Classifier

# https://huggingface.co/shahrukhx01/question-vs-statement-classifier/blob/main/README.md
# pip install transformers

In [2]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("shahrukhx01/question-vs-statement-classifier")
model = TFAutoModelForSequenceClassification.from_pretrained("shahrukhx01/question-vs-statement-classifier")

def classify_sentence(sentence, tokenizer, model):
    """
    Classify a sentence as either a question or statement.
    
    Args:
        sentence (str): The input sentence to classify
        tokenizer: The loaded tokenizer
        model: The loaded model
        
    Returns:
        dict: Classification result with label and confidence score
    """
    # Tokenize the input
    inputs = tokenizer(sentence, return_tensors="tf", truncation=True, max_length=128)
    
    # Get model predictions
    outputs = model(inputs)
    predictions = tf.nn.softmax(outputs.logits, axis=-1)
    
    # Get the predicted class and confidence score
    predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]
    confidence = float(predictions[0][predicted_class])
    
    # Map class index to label
    labels = {0: "statement", 1: "question"}
    predicted_label = labels[predicted_class]
    
    return {
        "sentence": sentence,
        "label": predicted_label,
        "confidence": round(confidence * 100, 2)
    }

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [7]:
sentence = "I don't really understand what you were saying at 5:30"
result = classify_sentence(sentence, tokenizer, model)
print(result)

{'sentence': "I don't really understand what you were saying at 5:30", 'label': 'statement', 'confidence': 99.96}


In [13]:
sentence = "Why can't I find the result"
result = classify_sentence(sentence, tokenizer, model)
print(result)

{'sentence': "Why can't I find the result", 'label': 'question', 'confidence': 99.93}


In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from typing import List, Dict
import numpy as np

class EmotionDetector:
    def __init__(self, model_name: str = "bhadresh-savani/bert-base-go-emotion"):
        """
        Initialize emotion detector with a specific BERT model.
        Default model includes confusion-like states (e.g., uncertainty)
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.model.eval()
        
    def detect_emotion(self, text: str) -> Dict[str, float]:
        """
        Detect emotions in the given text.
        Returns dictionary of emotion scores.
        """
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            scores = torch.nn.functional.softmax(outputs.logits, dim=1)
            
        # Get emotion labels and scores
        emotions = self.model.config.id2label
        emotion_scores = {emotions[i]: score.item() for i, score in enumerate(scores[0])}
        
        return emotion_scores

def analyze_confusion(text: str, threshold: float = 0.3) -> Dict[str, any]:
    """
    Analyze text specifically for confusion-related emotions
    """
    # Initialize with go-emotion model which includes confusion-like states
    detector = EmotionDetector()
    
    # Get emotion scores
    emotions = detector.detect_emotion(text)
    
    # Focus on confusion-related emotions
    confusion_indicators = {
        'confusion': emotions.get('confusion', 0),
        'uncertainty': emotions.get('uncertainty', 0),
        'nervousness': emotions.get('nervousness', 0)
    }
    
    return {
        'is_confused': any(score > threshold for score in confusion_indicators.values()),
        'confusion_scores': confusion_indicators,
        'all_emotions': emotions
    }

In [6]:
# Example usage
example_texts = [
    "I'm not sure I understand what's going on here.",
    "This doesn't make any sense to me.",
    "I'm completely lost with these instructions.",
    "This is crystal clear to me."
]

for text in example_texts:
    results = analyze_confusion(text)
    print(f"\nText: {text}")
    print(f"Is confused: {results['is_confused']}")
    print(f"Confusion scores: {results['confusion_scores']}")


Text: I'm not sure I understand what's going on here.
Is confused: True
Confusion scores: {'confusion': 0.6531734466552734, 'uncertainty': 0, 'nervousness': 0.0015624117804691195}

Text: This doesn't make any sense to me.
Is confused: False
Confusion scores: {'confusion': 0.010463634505867958, 'uncertainty': 0, 'nervousness': 0.0021200987976044416}

Text: I'm completely lost with these instructions.
Is confused: False
Confusion scores: {'confusion': 0.003107678610831499, 'uncertainty': 0, 'nervousness': 0.015677016228437424}

Text: This is crystal clear to me.
Is confused: False
Confusion scores: {'confusion': 0.0039923000149428844, 'uncertainty': 0, 'nervousness': 0.020075522363185883}
