In [None]:
# topic_modeling.ipynb

# This notebook demonstrates how to perform topic modeling on disaster-related tweets
# using Latent Dirichlet Allocation (LDA) from Scikit-learn.

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Sample tweets
tweets = [
    "Earthquake hits California!",
    "Floods in Venice have reached alarming levels.",
    "Hurricane causes massive damage in Florida.",
    "Wildfire spreading quickly in Australia.",
    "Tornado warnings issued in Texas."
]

# Vectorize the tweets
vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
tweet_matrix = vectorizer.fit_transform(tweets)

# Fit LDA model
lda = LatentDirichletAllocation(n_components=3, random_state=42)
lda.fit(tweet_matrix)

# Display the top words for each topic
feature_names = vectorizer.get_feature_names_out()
for topic_idx, topic in enumerate(lda.components_):
    print(f"Topic #{topic_idx}:")
    print(" ".join([feature_names[i] for i in topic.argsort()[:-11 - 1:-1]]))
