In [1]:
# Import required libraries
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split


def build_nlp_pipeline(X, y, vectorizer_type="tfidf", test_size=0.2):
    """
    Reusable NLP Pipeline Function
    
    Parameters:
    X : list of text documents
    y : labels
    vectorizer_type : 'bow' or 'tfidf'
    test_size : test split size
    
    Returns:
    trained pipeline model
    """

    # Choose vectorizer
    if vectorizer_type == "bow":
        vectorizer = CountVectorizer(stop_words='english')
    elif vectorizer_type == "tfidf":
        vectorizer = TfidfVectorizer(stop_words='english')
    else:
        raise ValueError("vectorizer_type must be 'bow' or 'tfidf'")

    # Create pipeline
    pipeline = Pipeline([
        ("vectorizer", vectorizer),
        ("classifier", LogisticRegression())
    ])

    # Train test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42
    )

    # Train model
    pipeline.fit(X_train, y_train)

    # Predict
    predictions = pipeline.predict(X_test)

    # Evaluation
    print("Accuracy:", accuracy_score(y_test, predictions))
    print("\nClassification Report:\n")
    print(classification_report(y_test, predictions))

    return pipeline

In [2]:
# Sample dataset
texts = [
    "I love machine learning",
    "Machine learning is amazing",
    "I hate bugs in code",
    "Debugging is very frustrating"
]

labels = ["positive", "positive", "negative", "negative"]

# Build and train pipeline
model = build_nlp_pipeline(texts, labels, vectorizer_type="tfidf")

# Test on new sentence
new_text = ["I love debugging"]
prediction = model.predict(new_text)

print("Prediction:", prediction[0])

Accuracy: 0.0

Classification Report:

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00       0.0
    positive       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

Prediction: negative


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
