Importing

In [None]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
import kagglehub

path = kagglehub.dataset_download("kazanova/sentiment140")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/sentiment140


In [None]:
col = ['Target','Id','Time','Query','Name','Text']
data = pd.read_csv("/kaggle/input/sentiment140/training.1600000.processed.noemoticon.csv",names=col,encoding='ISO-8859-1')

In [None]:
data['Target'].value_counts()

Unnamed: 0_level_0,count
Target,Unnamed: 1_level_1
0,800000
4,800000


In [None]:
data.replace({'Target':{4:1}},inplace=True)

In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

Stemming

In [None]:
port = PorterStemmer()

In [None]:
def stemming(data):
  content = re.sub('[^a-zA-Z]',' ',data)
  content = content.lower()
  content = content.split()
  content = [port.stem(i) for i in content if not i in stopwords.words('english')]
  content = " ".join(content)
  return content

In [None]:
data['Keyword'] = data['Text'].apply(stemming)

In [None]:
y = data['Target']
x = data['Keyword']

Splitting the data

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,stratify=y,random_state=4);

In [None]:
vectorizer = TfidfVectorizer()

x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

Training

In [None]:
model = LogisticRegression(max_iter=1000)

In [None]:
model.fit(x_train,y_train)

Prediction

In [None]:
train_predict = model.predict(x_train)
train_acc = accuracy_score(y_train,train_predict)
print(train_acc)

0.80147578125


In [None]:
test_predict = model.predict(x_test)
test_acc = accuracy_score(y_test,test_predict)
print(test_acc)

0.77811875


In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.32.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.2 (from gradio)
  Downloading gradio_client-1.10.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:
import gradio as gr
import numpy as np

# Assuming 'model' is your trained Logistic Regression model
# Assuming 'vectorizer' is your fitted TfidfVectorizer
# Ensure 'stemming' function is defined as in your notebook

def predict_sentiment(text):
    # Apply the same preprocessing as training data
    processed_text = stemming(text)
    # Transform the text using the fitted vectorizer
    vectorized_text = vectorizer.transform([processed_text])
    # Predict the sentiment
    prediction = model.predict(vectorized_text)

    if prediction[0] == 1:
        return "<div style='background-color: lightgreen; padding: 10px;'>Positive</div>"
    else:
        return "<div style='background-color: salmon; padding: 10px;'>Negative</div>"

# Create the Gradio interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
    outputs=gr.HTML(),  # Use HTML to display colored text
    title="Sentiment Analysis"
)

# Launch the interface
interface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f4b586b2752278335b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


