# 🤖 AI-Powered Customer Feedback Analyzer
An end-to-end NLP project to classify sentiment from customer feedback.

## Step 1: Import Libraries and Load Data

In [36]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to C:\Users\kalpesh
[nltk_data]     kajare\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\kalpesh
[nltk_data]     kajare\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## Step 2: Define Text Cleaning Function

In [37]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stopwords.words('english')]
    return ' '.join(tokens)

## Step 3: Load and Preprocess the Dataset

In [38]:
# Simulate or load your dataset here
# Example: df = pd.read_csv('customer_feedback.csv')
# For illustration, we'll create a small dataset

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

vectorizer = TfidfVectorizer(max_features=5000)

df = pd.read_csv('sentiment-analysis.csv')

# Split single column into multiple columns
df_split = df.iloc[:, 0].str.split(',', n=6, expand=True)
df_split.columns = ['Text', 'Sentiment', 'Source', 'Date/Time', 'User ID', 'Location', 'Confidence Score']

# Clean data
df_cleaned = df_split.apply(lambda col: col.str.strip().str.replace('"', '', regex=False))
df_cleaned = df_cleaned.dropna(subset=['Text', 'Sentiment'])

# Prepare features and target
X = vectorizer.fit_transform(df_cleaned['Text'])
y = df_cleaned['Sentiment']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    Negative       0.86      0.75      0.80         8
    Positive       0.85      0.92      0.88        12

    accuracy                           0.85        20
   macro avg       0.85      0.83      0.84        20
weighted avg       0.85      0.85      0.85        20



In [39]:
from sklearn.metrics import confusion_matrix, accuracy_score
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


[[ 6  2]
 [ 1 11]]
Accuracy: 0.85


In [40]:
import pickle 

with open("feedback_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)
