In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv("/content/twitter_training.csv")


In [None]:
# prompt: Analyze and visualize sentiment patterns in social media data to understand public opinion and attitudes towards specific topics or brands.
# use the above csv

import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'data' contains columns like 'text' (tweet content) and 'sentiment' (positive, negative, neutral)

# 1. Sentiment Distribution
sns.countplot(x='sentiment', data=data)
plt.title('Sentiment Distribution')
plt.show()

# 2. Word Clouds (requires wordcloud library)
# !pip install wordcloud
from wordcloud import WordCloud

positive_tweets = data[data['sentiment'] == 'Positive']['text'].str.cat(sep=' ')
negative_tweets = data[data['sentiment'] == 'Negative']['text'].str.cat(sep=' ')

wordcloud_positive = WordCloud().generate(positive_tweets)
wordcloud_negative = WordCloud().generate(negative_tweets)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_positive, interpolation='bilinear')
plt.title('Positive Sentiment')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(wordcloud_negative, interpolation='bilinear')
plt.title('Negative Sentiment')
plt.axis('off')

plt.show()

# 3. Topic Modeling (requires nltk library)
# !pip install nltk
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

stop_words = set(stopwords.words('english'))
data['text_tokens'] = data['text'].apply(lambda x: [word for word in word_tokenize(x) if word.lower() not in stop_words])

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform([' '.join(tokens) for tokens in data['text_tokens']])

# Apply a topic modeling algorithm (e.g., LDA) to tfidf_matrix
# ...

# Visualize topics and their associated words
# ...

# Further analysis can include:
# - Time series analysis of sentiment trends
# - Correlation of sentiment with specific keywords or hashtags
# - Sentiment analysis for different brands or topics within the data
