<a href="https://colab.research.google.com/github/skl30misha/data-science-portfolio/blob/main/Text_Emotion_Classifier(NLP)/NLP_Comment_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import re
import string
import nltk
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
from textblob import TextBlob
from nltk.tokenize import word_tokenize

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/NLP')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('amazon.csv')
df.head()
df_text = df["reviews.text"].dropna()


# Cleaning text stopwords

In [None]:

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = BeautifulSoup(text, "html.parser").get_text()
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = re.sub(r"\d+", "", text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

df['clean_text'] = df_text.apply(clean_text)
df_text = df['reviews.text'].dropna()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Tokenization and Lemmatization of text

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")
def spacy_token_lemma(text):
    doc = nlp(text)
    return [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
df['lemmas'] = df['clean_text'].apply(spacy_token_lemma)

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
df["lemmas_text"]=df["lemmas"].apply(lambda x: " ".join(x))


# Label Sentiment Based on Ratings

In [None]:
!pip install vaderSentiment

import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# 1. Анализатор
analyzer = SentimentIntensityAnalyzer()

# 2. Функция классификации
def vader_sentiment(text):
    score = analyzer.polarity_scores(str(text))['compound']
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# 3. Анализ датафрейма
df['sentiment'] = df['clean_text'].apply(vader_sentiment)

# 4. Результаты
print(df[['clean_text', 'sentiment']].head())
print(df['sentiment'].value_counts())



                                          clean_text sentiment
0  thought would big small paper turn like palm t...  positive
1             kindle light easy use especially beach  positive
2  didnt know much id use kindle went lower end i...  positive
3  happy purchase caught sale really good price n...  positive
4  solid entry level kindle great kids gifted kid...  positive
sentiment
positive    4557
negative     226
neutral      217
Name: count, dtype: int64


# check

In [None]:
sample = "I hate this website. It's so frustrating!"

score = analyzer.polarity_scores(sample)
compound = score['compound']

if compound >= 0.05:
    sentiment = 'positive'
elif compound <= -0.05:
    sentiment = 'negative'
else:
    sentiment = 'neutral'

print(f"Text: {sample}")
print(f"Compound score: {compound}")
print(f"Predicted Sentiment: {sentiment}")


Text: I hate this website. It's so frustrating!
Compound score: -0.8286
Predicted Sentiment: negative


# Gradio

In [4]:
import gradio as gr


def analyze_sentiment(text):
    score = analyzer.polarity_scores(text)['compound']
    if score >= 0.05:
        sentiment = 'Positive 😊'
    elif score <= -0.05:
        sentiment = 'Negative 😠'
    else:
        sentiment = 'Neutral 😐'
    return f"Sentiment: {sentiment} (score: {score:.2f})"

interface = gr.Interface(
    fn=analyze_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter a comment here..."),
    outputs=gr.Textbox(),
    title="VADER Sentiment Analyzer",
    description="Type your comment and see the sentiment prediction (positive, neutral, negative)."
)

interface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://25248ab5327a207398.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


