In [None]:
import pandas as pd
import ipywidgets as widgets
from ipywidgets import Layout
from ipywidgets import HBox, VBox, Layout
from IPython.display import display, clear_output
from ipyleaflet import Map, Marker, MarkerCluster
from textblob import TextBlob
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import os
import time
import io
from tqdm.notebook import tqdm


# === Title ===
display(widgets.HTML(value="""
<h1 style='color: #444;'>📌 SocioNarrative: Social Media Event Analyzer</h1>
<p style='color: #666;'>Upload social media data with optional geolocation, perform analysis, and explore results interactively on a map.</p>
"""))

In [None]:
# File upload widget
upload = widgets.FileUpload(accept='.csv', multiple=False, description="Upload CSV")
status_output = widgets.Output()

# Display area for head of enriched dataframe
df_output = widgets.Output()

# Initialize geocoder and cache
geolocator = Nominatim(user_agent="geo-enrichment-voila")
location_cache = {}

# Sentiment analysis
def get_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    if polarity > 0.1:
        return "positive"
    elif polarity < -0.1:
        return "negative"
    else:
        return "neutral"

# Geocoding
def enrich_geolocation(df):
    countries, latitudes, longitudes = [], [], []
    for loc in tqdm(df['location'], desc="📍 Geocoding"):
        if loc in location_cache:
            result = location_cache[loc]
        else:
            try:
                location = geolocator.geocode(loc, timeout=10)
                if location:
                    result = {
                        "country": location.address.split(",")[-1].strip(),
                        "latitude": location.latitude,
                        "longitude": location.longitude
                    }
                else:
                    result = {"country": None, "latitude": None, "longitude": None}
            except (GeocoderTimedOut, GeocoderServiceError):
                result = {"country": None, "latitude": None, "longitude": None}
            location_cache[loc] = result
            time.sleep(1)
        countries.append(result["country"])
        latitudes.append(result["latitude"])
        longitudes.append(result["longitude"])

    df["country"] = countries
    df["latitude"] = latitudes
    df["longitude"] = longitudes
    return df

# Callback function


def update_status(msg):
    status_output.clear_output(wait=True)
    with status_output:
        print(msg, flush=True)

def handle_upload(change):
    status_output.clear_output()
    df_output.clear_output()

    if not upload.value:
        update_status("⚠️ No file uploaded.")
        return

    update_status("📊 processing uploading file...")

    try:
        uploaded_file = upload.value[0]  # ✅ 使用下标而非 .values()
        content = io.BytesIO(uploaded_file['content'])
        df = pd.read_csv(content)
        print(df)
    except Exception as e:
        update_status(f"❌ 读取文件失败: {str(e)}")
        return

    update_status("🔍 正在进行情绪分析...")
    df['sentiment'] = df['text'].apply(get_sentiment)

    update_status("🌍 正在执行地理编码...")
    countries, latitudes, longitudes = [], [], []
    total = len(df)

    for i, loc in enumerate(df['location']):
        update_status(f"📍 Geocoding {i+1}/{total}: {loc}")
        if loc in location_cache:
            result = location_cache[loc]
        else:
            try:
                location = geolocator.geocode(loc, timeout=10)
                if location:
                    result = {
                        "country": location.address.split(",")[-1].strip(),
                        "latitude": location.latitude,
                        "longitude": location.longitude
                    }
                else:
                    result = {"country": None, "latitude": None, "longitude": None}
            except (GeocoderTimedOut, GeocoderServiceError):
                result = {"country": None, "latitude": None, "longitude": None}
            location_cache[loc] = result
            time.sleep(1)

        countries.append(result["country"])
        latitudes.append(result["latitude"])
        longitudes.append(result["longitude"])

    df["country"] = countries
    df["latitude"] = latitudes
    df["longitude"] = longitudes

    df.to_csv("../data/enriched_user_data.csv", index=False)
    update_status("✅ 完成，文件已保存为 enriched_user_data.csv")

    with df_output:
        display(df.head())

upload.observe(handle_upload, names='value')

# Layout for Voila
ui = widgets.VBox([
    widgets.HTML("<h2>📥 Upload CSV for Enrichment</h2>"),
    upload,
    status_output,
    df_output
])

ui
