# NLP-Based Sentiment Classification using Advanced Techniques
## Transformer Models, Embeddings, and Deep Learning for Employee Feedback Analysis

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import warnings
warnings.filterwarnings('ignore')

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report

print("Downloading NLTK data...")
nltk.download('punkt_tab', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('omw-1.4', quiet=True)

print("Libraries imported successfully")

Downloading NLTK data...
Libraries imported successfully


In [15]:
full_df = pd.read_csv('../NLP_Remark_Analysis/training_data_complete.csv')
train_df, test_df = train_test_split(full_df, test_size=0.2, random_state=42, stratify=full_df['Sentiment'])

print("Train-Test Split (80-20):")
print(f"  Total records: {len(full_df)}")
print(f"  Training set: {len(train_df)} ({len(train_df)/len(full_df):.1%})")
print(f"  Test set: {len(test_df)} ({len(test_df)/len(full_df):.1%})")
print(f"\nColumns: {list(train_df.columns)}")

print("\nSentiment Distribution:")
print("Training set:")
for sentiment in sorted(train_df['Sentiment'].unique()):
    count = (train_df['Sentiment'] == sentiment).sum()
    pct = round(count / len(train_df) * 100, 1)
    print(f"  {sentiment}: {count} ({pct}%)")
print("Test set:")
for sentiment in sorted(test_df['Sentiment'].unique()):
    count = (test_df['Sentiment'] == sentiment).sum()
    pct = round(count / len(test_df) * 100, 1)
    print(f"  {sentiment}: {count} ({pct}%)")

Train-Test Split (80-20):
  Total records: 500
  Training set: 400 (80.0%)
  Test set: 100 (20.0%)

Columns: ['Employee_ID', 'Associate_Name', 'Department', 'Evaluation_Result', 'Skill_Feedback_1', 'Skill_Feedback_2', 'Skill_Feedback_3', 'Overall_Feedback', 'Sentiment']

Sentiment Distribution:
Training set:
  negative: 85 (21.2%)
  neutral: 137 (34.2%)
  positive: 178 (44.5%)
Test set:
  negative: 21 (21.0%)
  neutral: 34 (34.0%)
  positive: 45 (45.0%)


In [16]:
print("TEXT PREPROCESSING AND TOKENIZATION\n")

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words and len(token) > 2]
    return ' '.join(tokens)

print("Preprocessing training data...")
train_df['processed_feedback'] = (train_df['Skill_Feedback_1'].fillna('') + ' ' + 
                                   train_df['Skill_Feedback_2'].fillna('') + ' ' + 
                                   train_df['Skill_Feedback_3'].fillna('') + ' ' + 
                                   train_df['Overall_Feedback'].fillna(''))
train_df['processed_feedback'] = train_df['processed_feedback'].apply(preprocess_text)

print("Preprocessing test data...")
test_df['processed_feedback'] = (test_df['Skill_Feedback_1'].fillna('') + ' ' + 
                                  test_df['Skill_Feedback_2'].fillna('') + ' ' + 
                                  test_df['Skill_Feedback_3'].fillna('') + ' ' + 
                                  test_df['Overall_Feedback'].fillna(''))
test_df['processed_feedback'] = test_df['processed_feedback'].apply(preprocess_text)

print("Preprocessing complete!")
print(f"\nSample preprocessed text:")
for idx in range(2):
    print(f"{idx+1}. {train_df['processed_feedback'].iloc[idx][:100]}...")

TEXT PREPROCESSING AND TOKENIZATION

Preprocessing training data...
Preprocessing test data...
Preprocessing complete!

Sample preprocessed text:
1. insufficient knowledge sql principle unable apply basic concept failed demonstrate basic dbms compet...
2. satisfactory sql knowledge room improvement performance optimization fair dbms knowledge ability han...


In [17]:
print("BUILD NLP FEATURE EXTRACTION PIPELINE\n")

from sklearn.feature_extraction.text import TfidfVectorizer
import scipy.sparse as sp

max_words = 5000
max_length = 100

tfidf_vectorizer = TfidfVectorizer(max_features=max_words, min_df=2, max_df=0.8, 
                                    ngram_range=(1, 2), lowercase=True, stop_words='english')

X_train_tfidf_sparse = tfidf_vectorizer.fit_transform(train_df['processed_feedback'])
X_test_tfidf_sparse = tfidf_vectorizer.transform(test_df['processed_feedback'])

X_train_tfidf = X_train_tfidf_sparse.toarray().astype(np.float32)
X_test_tfidf = X_test_tfidf_sparse.toarray().astype(np.float32)

le = LabelEncoder()
y_train_encoded = le.fit_transform(train_df['Sentiment'])
y_test_encoded = le.transform(test_df['Sentiment'])

print("NLP Feature Extraction Complete:")
print(f"  Vocabulary size: {len(tfidf_vectorizer.get_feature_names_out())}")
print(f"  Max words (features): {max_words}")
print(f"  Training TF-IDF shape: {X_train_tfidf.shape}")
print(f"  Test TF-IDF shape: {X_test_tfidf.shape}")
print(f"  Target classes: {list(le.classes_)}")
print(f"  Training samples: {len(X_train_tfidf)}")
print(f"  Test samples: {len(X_test_tfidf)}")

BUILD NLP FEATURE EXTRACTION PIPELINE

NLP Feature Extraction Complete:
  Vocabulary size: 484
  Max words (features): 5000
  Training TF-IDF shape: (400, 484)
  Test TF-IDF shape: (100, 484)
  Target classes: ['negative', 'neutral', 'positive']
  Training samples: 400
  Test samples: 100


In [18]:
print("TRAIN NLP-BASED CLASSIFICATION MODELS\n")

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

models_nlp = {}
results_nlp = {}

print("1. Logistic Regression (NLP)...", end=" ", flush=True)
lr_model = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1, class_weight='balanced')
lr_model.fit(X_train_tfidf, y_train_encoded)
y_pred_lr = lr_model.predict(X_test_tfidf)
acc_lr = accuracy_score(y_test_encoded, y_pred_lr)
results_nlp['Logistic Regression'] = {'accuracy': acc_lr, 'predictions': y_pred_lr}
models_nlp['Logistic Regression'] = lr_model
print(f"{acc_lr:.4f}")

print("2. Random Forest (NLP)...", end=" ", flush=True)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1, class_weight='balanced', max_depth=15)
rf_model.fit(X_train_tfidf, y_train_encoded)
y_pred_rf = rf_model.predict(X_test_tfidf)
acc_rf = accuracy_score(y_test_encoded, y_pred_rf)
results_nlp['Random Forest'] = {'accuracy': acc_rf, 'predictions': y_pred_rf}
models_nlp['Random Forest'] = rf_model
print(f"{acc_rf:.4f}")

print("3. Gradient Boosting (NLP)...", end=" ", flush=True)
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42, learning_rate=0.1, max_depth=5)
gb_model.fit(X_train_tfidf, y_train_encoded)
y_pred_gb = gb_model.predict(X_test_tfidf)
acc_gb = accuracy_score(y_test_encoded, y_pred_gb)
results_nlp['Gradient Boosting'] = {'accuracy': acc_gb, 'predictions': y_pred_gb}
models_nlp['Gradient Boosting'] = gb_model
print(f"{acc_gb:.4f}")

print("\nNLP MODEL PERFORMANCE SUMMARY:")
for model_name in sorted(results_nlp.keys(), key=lambda x: results_nlp[x]['accuracy'], reverse=True):
    acc = results_nlp[model_name]['accuracy']
    print(f"  {model_name}: {acc:.4f} ({int(acc*len(y_test_encoded))}/{len(y_test_encoded)})")

best_nlp_model_name = max(results_nlp.items(), key=lambda x: x[1]['accuracy'])[0]
best_nlp_model = models_nlp[best_nlp_model_name]
best_nlp_predictions = results_nlp[best_nlp_model_name]['predictions']
print(f"\nBEST NLP MODEL: {best_nlp_model_name} ({results_nlp[best_nlp_model_name]['accuracy']:.4f})")

TRAIN NLP-BASED CLASSIFICATION MODELS

1. Logistic Regression (NLP)... 1.0000
2. Random Forest (NLP)... 1.0000
3. Gradient Boosting (NLP)... 1.0000

NLP MODEL PERFORMANCE SUMMARY:
  Logistic Regression: 1.0000 (100/100)
  Random Forest: 1.0000 (100/100)
  Gradient Boosting: 1.0000 (100/100)

BEST NLP MODEL: Logistic Regression (1.0000)


In [19]:
print("EVALUATE MODEL PERFORMANCE WITH NLP METRICS\n")

predictions_df = test_df.copy()
predictions_df['Predicted_Sentiment'] = le.inverse_transform(best_nlp_predictions)
predictions_df['True_Sentiment'] = test_df['Sentiment']
predictions_df['Correct'] = predictions_df['Predicted_Sentiment'] == predictions_df['True_Sentiment']

total_correct = predictions_df['Correct'].sum()
total_records = len(predictions_df)
overall_accuracy = total_correct / total_records

print(f"OVERALL ACCURACY: {overall_accuracy:.2%} ({total_correct}/{total_records})\n")

print("ACCURACY BY SENTIMENT CLASS:")
for sentiment in le.classes_:
    mask = predictions_df['True_Sentiment'] == sentiment
    total = mask.sum()
    correct = (predictions_df[mask]['Correct']).sum()
    acc = correct / total if total > 0 else 0
    print(f"  {sentiment.upper()}: {correct}/{total} ({acc:.2%})")

print("\nDETAILED NLP METRICS:")
precision = precision_score(y_test_encoded, best_nlp_predictions, average='weighted', zero_division=0)
recall = recall_score(y_test_encoded, best_nlp_predictions, average='weighted', zero_division=0)
f1 = f1_score(y_test_encoded, best_nlp_predictions, average='weighted', zero_division=0)

print(f"  Precision (weighted): {precision:.4f}")
print(f"  Recall (weighted): {recall:.4f}")
print(f"  F1-Score (weighted): {f1:.4f}")

print("\nClassification Report:")
print(classification_report(y_test_encoded, best_nlp_predictions, target_names=le.classes_, zero_division=0))

predictions_df.to_csv('nlp_model_predictions.csv', index=False)
print("Predictions saved: nlp_model_predictions.csv")

EVALUATE MODEL PERFORMANCE WITH NLP METRICS

OVERALL ACCURACY: 100.00% (100/100)

ACCURACY BY SENTIMENT CLASS:
  NEGATIVE: 21/21 (100.00%)
  NEUTRAL: 34/34 (100.00%)
  POSITIVE: 45/45 (100.00%)

DETAILED NLP METRICS:
  Precision (weighted): 1.0000
  Recall (weighted): 1.0000
  F1-Score (weighted): 1.0000

Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00        21
     neutral       1.00      1.00      1.00        34
    positive       1.00      1.00      1.00        45

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100

Predictions saved: nlp_model_predictions.csv


In [20]:
print("GENERATE PREDICTIONS AND FEEDBACK WITH NLP\n")

def generate_nlp_feedback(predicted_sentiment):
    if predicted_sentiment == 'positive':
        return "Ready for advancement. Demonstrates strong capabilities and readiness for growth."
    elif predicted_sentiment == 'neutral':
        return "Conditional progression. Shows promise but requires targeted development in identified areas."
    else:
        return "Requires improvement. Needs additional support and training with mentorship."

predictions_df['Final_Feedback'] = predictions_df['Predicted_Sentiment'].apply(generate_nlp_feedback)

print("Sample NLP-Generated Feedbacks:\n")
for idx in range(min(5, len(predictions_df))):
    print(f"{idx+1}. {predictions_df['Associate_Name'].iloc[idx]} ({predictions_df['Predicted_Sentiment'].iloc[idx]}): {predictions_df['Final_Feedback'].iloc[idx]}")

final_nlp_df = predictions_df[[
    'Employee_ID', 'Associate_Name', 'Department', 'Evaluation_Result',
    'Skill_Feedback_1', 'Skill_Feedback_2', 'Skill_Feedback_3', 'Overall_Feedback',
    'Sentiment', 'Predicted_Sentiment', 'Final_Feedback'
]]

final_nlp_df.to_csv('nlp_final_feedback_with_all_columns.csv', index=False)
print(f"\nFinal NLP CSV saved: nlp_final_feedback_with_all_columns.csv")
print(f"Total records: {len(final_nlp_df)}")
print(f"Columns: {list(final_nlp_df.columns)}")

print("\nMODEL COMPARISON:")
print(f"Best NLP Model: {best_nlp_model_name}")
print(f"NLP Model Accuracy: {overall_accuracy:.2%}")

GENERATE PREDICTIONS AND FEEDBACK WITH NLP

Sample NLP-Generated Feedbacks:

1. Rohan Chopra (positive): Ready for advancement. Demonstrates strong capabilities and readiness for growth.
2. Kavya Reddy (neutral): Conditional progression. Shows promise but requires targeted development in identified areas.
3. Kavya Desai (positive): Ready for advancement. Demonstrates strong capabilities and readiness for growth.
4. Shreya Sharma (negative): Requires improvement. Needs additional support and training with mentorship.
5. Shreya Singh (negative): Requires improvement. Needs additional support and training with mentorship.

Final NLP CSV saved: nlp_final_feedback_with_all_columns.csv
Total records: 100
Columns: ['Employee_ID', 'Associate_Name', 'Department', 'Evaluation_Result', 'Skill_Feedback_1', 'Skill_Feedback_2', 'Skill_Feedback_3', 'Overall_Feedback', 'Sentiment', 'Predicted_Sentiment', 'Final_Feedback']

MODEL COMPARISON:
Best NLP Model: Logistic Regression
NLP Model Accuracy: 100.0