# CODECRAFT_DS_04: Twitter Sentiment Analysis & Visualization
This project analyzes sentiment patterns in Twitter data to understand public opinion and attitudes toward specific entities or brands using the Twitter Entity Sentiment Analysis dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import re
import nltk
from nltk.corpus import stopwords

# nltk downloads
nltk.download('stopwords')

# Load dataset
df = pd.read_csv('twitter_training.csv', header=None, names=['tweet_id', 'entity', 'sentiment', 'tweet'])
df.head()

In [None]:
# Function to clean tweets
def clean_tweet(text):
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\@\w+|\#','', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    stop_words = set(stopwords.words('english'))
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

df['clean_tweet'] = df['tweet'].apply(clean_tweet)
df.head()

In [None]:
# Sentiment distribution
sns.countplot(x='sentiment', data=df, palette='Set2')
plt.title('Sentiment Distribution')
plt.show()

In [None]:
top_entities = df['entity'].value_counts().head(10)
top_entities.plot(kind='bar', color='skyblue')
plt.title('Top 10 Most Mentioned Entities')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Generate WordCloud for each sentiment
for sentiment in df['sentiment'].unique():
    text = ' '.join(df[df['sentiment'] == sentiment]['clean_tweet'])
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'WordCloud for {sentiment} Tweets')
    plt.show()

## 📌 Key Insights
- Most tweets are classified as Positive or Negative.
- Some entities are far more discussed than others.
- WordClouds help visualize common terms for each sentiment type.
- Cleaning and preprocessing greatly improve readability and results.