<a href="https://colab.research.google.com/github/larpita/Feedback-Sentiment-Analyzer/blob/main/Sentiment_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Smart Feedback Classification System
This project trains a machine learning model to classify user feedback as
Positive or Negative.


In [2]:
#importing required libraries and loading & reading the csv file into a dataframe called df
import pandas as pd

df = pd.read_csv(
    "train_data.csv",
    encoding="latin-1"
)

df.head() #shows the first 5 rows of the dataset


Unnamed: 0,sentence,sentiment
0,awww that s a bummer you shoulda got david car...,0
1,is upset that he can t update his facebook by ...,0
2,i dived many times for the ball managed to sav...,0
3,my whole body feels itchy and like its on fire,0
4,no it s not behaving at all i m mad why am i h...,0


In [None]:
#showing the exact column names present in the dataset
df.columns


Index(['sentence', 'sentiment'], dtype='object')

In [None]:
df['sentiment'].value_counts() #shows how many samples belong to each sentiment class


Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
0,767059
1,756916


In [None]:
df = df.rename(columns={"sentence": "text"}) #renaming the sentence column to text


In [None]:
#cleaning of text
import re

def clean_text(text):
    text = text.lower() #convert to lowercase
    text = re.sub(r"http\S+", "", text) #removing url's
    text = re.sub(r"[^a-z\s]", "", text) #removing symbols and numbers
    return text

df['clean_text'] = df['text'].apply(clean_text)
df.head()


Unnamed: 0,text,sentiment,clean_text
0,awww that s a bummer you shoulda got david car...,0,awww that s a bummer you shoulda got david car...
1,is upset that he can t update his facebook by ...,0,is upset that he can t update his facebook by ...
2,i dived many times for the ball managed to sav...,0,i dived many times for the ball managed to sav...
3,my whole body feels itchy and like its on fire,0,my whole body feels itchy and like its on fire
4,no it s not behaving at all i m mad why am i h...,0,no it s not behaving at all i m mad why am i h...


In [None]:
#train-test split
from sklearn.model_selection import train_test_split

X = df['clean_text']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
#TF-IDF vectorization - convert text to numbers
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)

X_train_vec = vectorizer.fit_transform(X_train) #learns importance of word from training data (common words-low weight, rare but important words -high weight)
X_test_vec = vectorizer.transform(X_test) #applies what it has learnt


In [None]:
#Logistic regression training
#learns whether those imp words are of positive or negative sentiment
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train) # looks at the training text (in numbers) and the correct answers, and learn which words mean Positive and which mean Negative.


In [None]:
#evaluating the performance on unseen data
from sklearn.metrics import accuracy_score, classification_report

preds = model.predict(X_test_vec) #predicts sentiment based on what it has learned

print("Accuracy:", accuracy_score(y_test, preds))
print(classification_report(y_test, preds))


Accuracy: 0.7640315621975426
              precision    recall  f1-score   support

           0       0.78      0.74      0.76    153092
           1       0.75      0.79      0.77    151703

    accuracy                           0.76    304795
   macro avg       0.76      0.76      0.76    304795
weighted avg       0.76      0.76      0.76    304795



In [3]:
#loading and reading a sample test csv file (unseen data)
test_df = pd.read_csv(
    "test_data.csv",
    encoding="latin-1"
)

test_df.head()


Unnamed: 0,sentence,sentiment
0,i loooooooovvvvvveee my kindle not that the dx...,1
1,reading my kindle love it lee childs is good read,1
2,ok first assesment of the kindle it fucking rocks,1
3,you ll love your kindle i ve had mine for a fe...,1
4,fair enough but i have the kindle and i think ...,1


In [None]:
test_df.columns


Index(['sentence', 'sentiment'], dtype='object')

In [None]:
test_df = test_df.rename(columns={"sentence": "text"})


In [None]:
#clean test data with the same logic as used for training data(same cleaning function is used)
test_df['clean_text'] = test_df['text'].apply(clean_text)


In [None]:
#vectorize test data (apply what it has learned) - no re-training
X_test_new = vectorizer.transform(test_df['clean_text'])


In [None]:
#predict sentiment (apply what it has learned)
test_df['predicted_sentiment'] = model.predict(X_test_new)


In [None]:
test_df[['text', 'predicted_sentiment']].head()


Unnamed: 0,text,predicted_sentiment
0,i loooooooovvvvvveee my kindle not that the dx...,1
1,reading my kindle love it lee childs is good read,1
2,ok first assesment of the kindle it fucking rocks,1
3,you ll love your kindle i ve had mine for a fe...,1
4,fair enough but i have the kindle and i think ...,1


In [None]:
#accuracy report
from sklearn.metrics import accuracy_score, classification_report

print("Test Accuracy:",
      accuracy_score(test_df['sentiment'], test_df['predicted_sentiment']))

print(classification_report(
    test_df['sentiment'],
    test_df['predicted_sentiment']
))


Test Accuracy: 0.8161559888579387
              precision    recall  f1-score   support

           0       0.82      0.80      0.81       177
           1       0.81      0.84      0.82       182

    accuracy                           0.82       359
   macro avg       0.82      0.82      0.82       359
weighted avg       0.82      0.82      0.82       359



In [None]:
#installing gradio ui
!pip install gradio




In [1]:
#connecting ui to the model
def predict_sentiment_ui(text):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)[0]

    if prediction == 1:
        return "Positive ðŸ˜Š"
    else:
        return "Negative ðŸ˜ž"


In [2]:
#building the UI
import gradio as gr

ui = gr.Interface(
    fn=predict_sentiment_ui,
    inputs=gr.Textbox(lines=2, placeholder="Enter your feedback here..."),
    outputs="text",
    title="Feedback Sentiment Analyzer",
    description="Enter a sentence to analyze whether the sentiment is positive or negative."
)

ui.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8b568cef682e7a4dba.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


