In [None]:
# Import libraries yang dibutuhkan
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import seaborn as sns
import numpy as np

# Load data
df = pd.read_csv('tweets-labeled.csv')

# Filter data berdasarkan sentimen
df_positive = df[df['label'] == 'positif']
df_negative = df[df['label'] == 'negatif']
df_neutral = df[df['label'] == 'netral']

print("📊 ANALISIS SENTIMEN TWEET")
print("=" * 50)
print(f"Total data: {len(df)}")
print(f"Sentimen Positif: {len(df_positive)}")
print(f"Sentimen Negatif: {len(df_negative)}")
print(f"Sentimen Netral: {len(df_neutral)}")

# ===============================
# WORD CLOUD ANALYSIS
# ===============================

# Membuat Word Cloud untuk Sentimen Positif
positive_text = " ".join(text for text in df_positive['cleaned_text'])
wordcloud_positive = WordCloud(width=800, height=400, background_color='white',
                              colormap='Greens', max_words=100).generate(positive_text)

# Membuat Word Cloud untuk Sentimen Negatif
negative_text = " ".join(text for text in df_negative['cleaned_text'])
wordcloud_negative = WordCloud(width=800, height=400, background_color='white',
                              colormap='Reds', max_words=100).generate(negative_text)

# Membuat Word Cloud untuk Sentimen Netral
neutral_text = " ".join(text for text in df_neutral['cleaned_text'])
wordcloud_neutral = WordCloud(width=800, height=400, background_color='white',
                             colormap='Blues', max_words=100).generate(neutral_text)

# Plot Word Cloud dalam satu figure
fig, axes = plt.subplots(3, 1, figsize=(12, 15))

# Word Cloud Positif
axes[0].imshow(wordcloud_positive, interpolation='bilinear')
axes[0].set_title("Word Cloud Sentimen Positif", fontsize=16, fontweight='bold', color='green')
axes[0].axis("off")

# Word Cloud Negatif
axes[1].imshow(wordcloud_negative, interpolation='bilinear')
axes[1].set_title("Word Cloud Sentimen Negatif", fontsize=16, fontweight='bold', color='red')
axes[1].axis("off")

# Word Cloud Netral
axes[2].imshow(wordcloud_neutral, interpolation='bilinear')
axes[2].set_title("Word Cloud Sentimen Netral", fontsize=16, fontweight='bold', color='blue')
axes[2].axis("off")

plt.tight_layout()
plt.show()

# ===============================
# ANALISIS POLARITAS SENTIMEN
# ===============================

# Menghitung polaritas sentimen
polaritas = df['label'].value_counts()

# Membuat visualisasi dengan 3 subplot
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# 1. Diagram Batang Polaritas Sentimen
colors = ['#2E8B57', '#DC143C', '#4682B4']  # Hijau, Merah, Biru
bars = axes[0].bar(polaritas.index, polaritas.values, color=colors, alpha=0.8, edgecolor='black')
axes[0].set_title("Diagram Batang Polaritas Sentimen", fontsize=14, fontweight='bold')
axes[0].set_xlabel("Sentimen", fontsize=12)
axes[0].set_ylabel("Jumlah Tweet", fontsize=12)

# Menambahkan nilai di atas setiap bar
for bar in bars:
    height = bar.get_height()
    axes[0].text(bar.get_x() + bar.get_width()/2., height + 0.5,
                f'{int(height)}', ha='center', va='bottom', fontweight='bold')

# 2. Pie Chart Polaritas Sentimen
colors_pie = ['#90EE90', '#FFB6C1', '#87CEEB']  # Light Green, Light Pink, Light Blue
wedges, texts, autotexts = axes[1].pie(polaritas.values, labels=polaritas.index,
                                      colors=colors_pie, autopct='%1.1f%%',
                                      startangle=90, explode=(0.05, 0.05, 0.05),
                                      shadow=True)

# Mempercantik pie chart
for autotext in autotexts:
    autotext.set_color('black')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(11)

for text in texts:
    text.set_fontsize(12)
    text.set_fontweight('bold')

axes[1].set_title("Distribusi Sentimen Tweet (Pie Chart)", fontsize=14, fontweight='bold')

# 3. Donut Chart (Pie Chart dengan hole)
colors_donut = ['#32CD32', '#FF6347', '#4169E1']  # Lime Green, Tomato, Royal Blue
wedges, texts, autotexts = axes[2].pie(polaritas.values, labels=polaritas.index,
                                      colors=colors_donut, autopct='%1.1f%%',
                                      startangle=90, explode=(0.1, 0.1, 0.1),
                                      shadow=True, pctdistance=0.85)

# Membuat hole di tengah untuk donut chart
centre_circle = plt.Circle((0,0), 0.70, fc='white')
axes[2].add_artist(centre_circle)

# Mempercantik donut chart
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(10)

for text in texts:
    text.set_fontsize(11)
    text.set_fontweight('bold')

axes[2].set_title("Distribusi Sentimen Tweet (Donut Chart)", fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

# ===============================
# PIE CHART DETAIL TERPISAH
# ===============================

# Membuat pie chart yang lebih detail dan menarik
plt.figure(figsize=(10, 8))

# Warna yang lebih menarik
colors_detailed = ['#FF9999', '#66B2FF', '#99FF99']
explode = (0.1, 0.1, 0.1)  # Memisahkan setiap slice

# Membuat pie chart
wedges, texts, autotexts = plt.pie(polaritas.values,
                                  labels=polaritas.index,
                                  colors=colors_detailed,
                                  autopct=lambda pct: f'{pct:.1f}%\n({int(pct/100*polaritas.sum())} tweets)',
                                  startangle=90,
                                  explode=explode,
                                  shadow=True,
                                  textprops={'fontsize': 12, 'fontweight': 'bold'})

# Mempercantik teks
for autotext in autotexts:
    autotext.set_color('black')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(11)

plt.title("Distribusi Polaritas Sentimen Tweet", fontsize=16, fontweight='bold', pad=20)

# Menambahkan legend
plt.legend(wedges, [f'{label}: {count} tweets' for label, count in zip(polaritas.index, polaritas.values)],
          title="Kategori Sentimen",
          loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))

plt.tight_layout()
plt.show()

# ===============================
# STATISTIK DETAIL
# ===============================

print("\n📈 STATISTIK DETAIL SENTIMEN:")
print("=" * 40)
total_tweets = len(df)

for sentimen in polaritas.index:
    count = polaritas[sentimen]
    percentage = (count / total_tweets) * 100
    print(f"{sentimen.upper():<10}: {count:>4} tweets ({percentage:>5.1f}%)")

print(f"\n{'TOTAL':<10}: {total_tweets:>4} tweets (100.0%)")

# ===============================
# VISUALISASI TAMBAHAN
# ===============================

# Horizontal Bar Chart
plt.figure(figsize=(10, 6))
colors_horizontal = ['#FF6B6B', '#4ECDC4', '#45B7D1']
bars = plt.barh(polaritas.index, polaritas.values, color=colors_horizontal, alpha=0.8, edgecolor='black')

plt.title("Distribusi Sentimen Tweet (Horizontal Bar Chart)", fontsize=14, fontweight='bold')
plt.xlabel("Jumlah Tweet", fontsize=12)
plt.ylabel("Sentimen", fontsize=12)

# Menambahkan nilai di ujung setiap bar
for i, bar in enumerate(bars):
    width = bar.get_width()
    plt.text(width + 1, bar.get_y() + bar.get_height()/2,
             f'{int(width)} ({width/total_tweets*100:.1f}%)',
             ha='left', va='center', fontweight='bold')

plt.tight_layout()
plt.show()

# Menampilkan hasil polaritas
print("\n🎯 RINGKASAN HASIL:")
print("=" * 30)
print("Jumlah Tweet berdasarkan Polaritas Sentimen:")
print(polaritas)

# Sentimen dominan
sentimen_dominan = polaritas.idxmax()
persentase_dominan = (polaritas.max() / total_tweets) * 100
print(f"\n🏆 Sentimen dominan: {sentimen_dominan.upper()} ({persentase_dominan:.1f}%)")