<a href="https://colab.research.google.com/github/sihamlam88/ai-journey/blob/main/NLP_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# ----------------------------
# 1. Create and expand dataset
# ----------------------------

data = {
    "text": [
        "My internet is not working, please help!",
        "Thank you for the quick support, very satisfied!",
        "Still waiting for someone to call me back.",
        "Agent was rude, I want to file a complaint.",
        "Best service ever, really appreciate it.",
        "It’s been 3 days and I have no update. Not happy.",
        "Everything is working perfectly now. Great job!",
        "I’ve been disconnected three times! Fix this.",
        "Support team was helpful and polite.",
        "Nobody helped me. Waste of time.",
        "Love the way your agent handled my issue!",
        "The app keeps crashing. Terrible experience.",
        "Really happy with the service I received!",
        "Excellent support! Thanks a lot!",
        "Terrible wait time, I’m frustrated.",
        "Fantastic resolution speed, I’m impressed!",
        "No one contacted me, very bad support.",
        "Perfect, no issues at all. Thank you!",
        "Worst experience I’ve ever had.",
        "Couldn’t be happier, well done!"
    ],
    "label": [0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)

# ----------------------------
# 2. Vectorize using TF-IDF
# ----------------------------

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["text"])
y = df["label"]

# ----------------------------
# 3. Stratified train/test split
# ----------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, stratify=y, random_state=42
)

# ----------------------------
# 4. Train model with balancing
# ----------------------------

model = LogisticRegression(class_weight='balanced', solver='liblinear')
model.fit(X_train, y_train)

# ----------------------------
# 5. Evaluate model
# ----------------------------

y_pred = model.predict(X_test)
print("📊 Classification Report:\n")
print(classification_report(y_test, y_pred))

# ----------------------------
# 6. Test on new messages
# ----------------------------

new_text = ["I love how fast this got resolved!", "No one answered my request."]
new_vec = vectorizer.transform(new_text)
predictions = model.predict(new_vec)

for text, label in zip(new_text, predictions):
    sentiment = "Positive 😊" if label == 1 else "Negative 😡"
    print(f"\n'{text}' → {sentiment}")

📊 Classification Report:

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       0.50      0.67      0.57         3

    accuracy                           0.57         7
   macro avg       0.58      0.58      0.57         7
weighted avg       0.60      0.57      0.57         7


'I love how fast this got resolved!' → Positive 😊

'No one answered my request.' → Negative 😡
