# ***Customer Feedback Sentiment Analysis System***

**PHASE 2 — Text Preprocessing (Classical NLP)**

**DataSet**

https://drive.google.com/file/d/1WNnl6aPtwjI8Od8iIE0WhxeR533zu9Ce/view?usp=sharing

In [None]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Step 2: Load dataset
df = pd.read_csv('/content/Twitter_Data.csv')
df.head()


Unnamed: 0,clean_text,category
0,when modi promised “minimum government maximum...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0


In [None]:
# Step 3: Rename columns for clarity
df = df[['clean_text', 'category']]
df.columns = ['text', 'sentiment']


In [None]:
print(df.isnull().sum())

text         4
sentiment    7
dtype: int64


In [None]:
# Step 4: Handle missing values
df.dropna(inplace=True)

In [None]:
print(df.isnull().sum())

text         0
sentiment    0
dtype: int64


In [None]:
# Step 5: Text cleaning function
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
  text = text.lower()
  text = re.sub(r'http\S+|www\S+', '', text)
  text = re.sub(r'[^a-zA-Z]', ' ', text)
  words = text.split()
  words = [stemmer.stem(word) for word in words if word not in stop_words]
  return ' '.join(words)


# Apply preprocessing
df['clean_text'] = df['text'].apply(preprocess_text)

In [None]:
print(df.head(5))

                                                text  sentiment  \
0  when modi promised “minimum government maximum...       -1.0   
1  talk all the nonsense and continue all the dra...        0.0   
2  what did just say vote for modi  welcome bjp t...        1.0   
3  asking his supporters prefix chowkidar their n...        1.0   
4  answer who among these the most powerful world...        1.0   

                                          clean_text  
0  modi promis minimum govern maximum govern expe...  
1               talk nonsens continu drama vote modi  
2  say vote modi welcom bjp told rahul main campa...  
3  ask support prefix chowkidar name modi great s...  
4  answer among power world leader today trump pu...  


**PHASE 3 — TF-IDF Based Model**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
df['clean_text'], df['sentiment'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Train Logistic Regression model
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_tfidf, y_train)


# Evaluation
y_pred = lr.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        -1.0       0.82      0.72      0.77      7152
         0.0       0.82      0.91      0.86     11067
         1.0       0.88      0.85      0.86     14375

    accuracy                           0.84     32594
   macro avg       0.84      0.83      0.83     32594
weighted avg       0.84      0.84      0.84     32594



**PHASE 4 — Word2Vec Based Model**

In [None]:
!pip install gensim



In [None]:
from gensim.models import Word2Vec

# Tokenize sentences
tokenized_sentences = [text.split() for text in df['clean_text']]

# Train Word2Vec model
w2v_model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=5,
min_count=2)

# Sentence vector creation
def sentence_vector(sentence):
  vectors = [w2v_model.wv[word] for word in sentence.split() if word in w2v_model.wv]
  return np.mean(vectors, axis=0) if len(vectors) > 0 else np.zeros(100)

X_w2v = np.array([sentence_vector(text) for text in df['clean_text']])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_w2v, df['sentiment'],
test_size=0.2)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        -1.0       0.50      0.28      0.36      6980
         0.0       0.62      0.62      0.62     11110
         1.0       0.62      0.75      0.68     14504

    accuracy                           0.61     32594
   macro avg       0.58      0.55      0.55     32594
weighted avg       0.59      0.61      0.59     32594



**PHASE 5 — BERT Upgrade (ADVANCED)**

In [None]:
!pip install transformers



In [None]:
from transformers import pipeline

bert_sentiment = pipeline('sentiment-analysis')


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


In [None]:
# Prediction example
bert_sentiment("I love this product but the battery is bad")


[{'label': 'NEGATIVE', 'score': 0.9986051917076111}]

In [None]:
bert_sentiment("I will kill you")

[{'label': 'NEGATIVE', 'score': 0.9264492392539978}]

In [None]:
bert_sentiment("I love you")

[{'label': 'POSITIVE', 'score': 0.9998656511306763}]

**PHASE 6 — Deployment Awareness (Streamlit)**

In [None]:
!pip install streamlit pyngrok transformers torch

Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [None]:
%%writefile app.py
import streamlit as st
from transformers import pipeline

st.set_page_config(page_title="Sentiment Analyzer")

st.title("Customer Feedback Sentiment Analysis")

@st.cache_resource
def load_model():
    return pipeline("sentiment-analysis")

model = load_model()

text = st.text_area("Enter customer feedback")

if st.button("Analyze"):
    if text.strip() == "":
        st.warning("Please enter text")
    else:
        result = model(text)[0]
        st.success(f"Sentiment: {result['label']}")
        st.write(f"Score: {round(result['score'], 2)}")


Writing app.py


In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("38VwiMXdEr8TCVT1lRDC8KpKvz7_WFLrDQSVg4pxnoPNh36w")


In [None]:
!streamlit run app.py &>/content/logs.txt &

#Explanation:

#& → runs in background

#logs.txt → suppresses warnings

In [None]:
public_url = ngrok.connect(8501)
print("Streamlit App URL:", public_url)


Streamlit App URL: NgrokTunnel: "https://bibulously-nonsupporting-yoko.ngrok-free.dev" -> "http://localhost:8501"
