In [None]:
#event generation
# --- Imports ---
from vertexai.preview.generative_models import GenerativeModel
from google.cloud import firestore
import time

# --- 1. Connect to Firestore ---
db = firestore.Client()
cluster_collection = db.collection('cl7')
posts_collection = db.collection('instagram_events_trial1')
event_collection = db.collection('event6')

# --- 2. Initialize Gemini Model ---
gemini_model = GenerativeModel("gemini-2.5-flash")

try:
    cluster_docs = cluster_collection.stream()

    for cluster_doc in cluster_docs:
        cluster_data = cluster_doc.to_dict()
        cluster_id = cluster_doc.id
        document_ids = cluster_data.get('document_ids', [])

        if not document_ids:
            continue

        # --- Fetch posts for this cluster ---
        summaries = []
        media_urls = []
        matched_posts = 0

        for doc_id in document_ids:
            post_doc = posts_collection.document(doc_id).get()
            if post_doc.exists:
                matched_posts += 1
                post = post_doc.to_dict()
                if 'summary' in post:
                    summaries.append(post['summary'])
                if 'media_url' in post:
                    media_urls.append(post['media_url'])

        if matched_posts == 0:
            continue

        # --- Gemini Event Summary ---
        try:
            combined_text = "\n".join(summaries)
            prompt = f"Summarize the following social media post summaries into one coherent event summary:\n\n{combined_text}"
            response = gemini_model.generate_content(prompt)
            final_summary = response.text.strip()
        except Exception:
            final_summary = "Event summary could not be generated."

        # --- Use cluster data directly ---
        avg_time_of_day = cluster_data.get('avg_time_of_day', 'Not available')
        date_only = cluster_data.get('date', 'Unknown')
        location = cluster_data.get('location', 'Unknown')
        confidence_score = cluster_data.get('confidence_score', 0)

        # --- Fetch sentiment from cl7 ---
        sentiment_doc = cluster_collection.document(cluster_id).get()
        overall_sentiment = 'Unknown'
        if sentiment_doc.exists:
            sent_data = sentiment_doc.to_dict()
            overall_sentiment = sent_data.get('overall_sentiment', 'Unknown')

        # --- Insert event document ---
        new_event_document = {
            "cluster_id": cluster_id,
            "event_summary": final_summary,
            "media_urls": media_urls,
            "confidence_score": confidence_score,
            "source_document_count": matched_posts,
            "average_time_of_day": avg_time_of_day,
            "event_date": date_only,
            "location": location,
            "overall_sentiment": overall_sentiment
        }

        event_collection.add(new_event_document)
        print(f" Inserted event for cluster {cluster_id}")

        time.sleep(2)  # Prevent quota issues

except Exception as e:
    print(f" Error: {e}")

finally:
    print(" Firestore processing completed.")
