In [None]:
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import re

In [None]:
# Load the sentiment dataset
df = pd.read_csv("sentimentdataset.csv")
df.head()

In [None]:
# Function to clean text
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\.\S+", "", text)
    text = re.sub(r"@[\w]*", "", text)
    text = re.sub(r"#[\w]*", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text.strip()

df["Cleaned_Text"] = df["content"].apply(clean_text)
df[["content", "Cleaned_Text"]].head()

In [None]:
# Function to get sentiment score
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

df["Sentiment_Score"] = df["Cleaned_Text"].apply(get_sentiment)

# Label sentiment
def label_sentiment(score):
    if score > 0:
        return "Positive"
    elif score < 0:
        return "Negative"
    else:
        return "Neutral"

df["Sentiment"] = df["Sentiment_Score"].apply(label_sentiment)
df[["Cleaned_Text", "Sentiment_Score", "Sentiment"]].head()

In [None]:
# Save to CSV
df.to_csv("processed_sentiment.csv", index=False)

In [None]:
# Visualize sentiment distribution
sentiment_counts = df["Sentiment"].value_counts()
sentiment_counts.plot(kind="bar", color=["green", "red", "gray"])
plt.title("Sentiment Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Tweet Count")
plt.show()