In [None]:
import pandas as pd
import re
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

nltk.download("vader_lexicon")  # Download the VADER lexicon
# Load your dataset (e.g., CSV file containing tweets)
data = pd.read_csv("social_media_data.csv")
# Isolate Tweet Texts
tweets = data["text"]

# Function to remove @ mentions
def remove_ats(tweet):
    """Removes the @ symbol and the word following it from a tweet"""
    return re.sub(r"@\w+", "", tweet)

# Apply the function to remove @ mentions
tweets_without_at = [remove_ats(text) for text in tweets]
# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Create a list to hold sentiment scores
sentiments = []

# Calculate sentiment scores for each tweet
for tweet in tweets_without_at:
    sentiments.append(analyzer.polarity_scores(tweet).get("compound"))

# Add sentiment scores to the DataFrame
data["sentiment"] = sentiments
# Convert tweet timestamps to datetime objects if available
data['tweet_created'] = pd.to_datetime(data['tweet_created'])

# Set the index to the tweet timestamps
data.set_index('tweet_created', inplace=True)

# Resample the data by hour and calculate the mean sentiment score
data_hourly = data.resample('H').mean()

# Plot the sentiment scores over time
plt.figure(figsize=(10, 6))
plt.title("Average Sentiment Over Time")
plt.ylim([-1, 1])
plt.plot(data_hourly.index, data_hourly["sentiment"])
plt.xlabel("Time")
plt.ylabel("Average Sentiment Score")
plt.show()