# The dynamics between President Claudia Sheinbaum and journalists in Las Mañaneras del Pueblo

### Goal:
The goal of this project is to explore the interactional dynamics between the president and journalists to understand whether the Mañaneras function as a genuine conversation or as a controlled political monologue. 

In [90]:
import json
import pandas as pd
import altair as alt
import sys
sys.path.append("../src")
from data_processing import * 
import geopandas as gpd
import unidecode

In [91]:
output_folder = "../static-viz/figures/static_final/"

In [92]:
# Load raw data
with open("../data/processed/article_transcripts.json", "r", encoding="utf-8") as f:
    data = json.load(f)
# Convert to DataFrame
df = flatten_data(data)

In [93]:
# Visualization style parameters

# Color palette
color_president = "#9B2915"      # Rufous
color_journalist = "#063A35"     # Dark slate gray
color_officials = "#BD8B9C"      # Puce
color_others = "#1EA896"         # Persian green
color_more = "#E9B872"           # Earth yellow
color_gray = "#4f4f4d"           # Medium gray

# Altair theme (base style)
alt.themes.enable('default')
base_props = {
    "width": 600,
    "height": 900
}

# Global text properties
TITLE_FONT = "Libre Franklin"
LABEL_FONT = "Libre Franklin"
alt.themes.register('mananeras_theme', lambda: {
    "config": {
        "title": {"fontSize": 22, "font": TITLE_FONT, "anchor": "middle", "color": "#313030"},
        "axis": {"labelFont": LABEL_FONT, "titleFont": LABEL_FONT, "labelColor": "#545151"},
        "view": {"strokeWidth": 0}
    }
})
alt.themes.enable('mananeras_theme')

ThemeRegistry.enable('mananeras_theme')

### 1. Context: When and how much are they speaking?

In [94]:
df_1 = get_daily_lengths_by_actor(df)

In [95]:
cat_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Step 1. Chart for the president/official
chart_president = (
    alt.Chart(df_1[df_1["speaker_group"] == "President/Official"])
    .mark_rect(stroke='white')
    .encode(
        x=alt.X("day_of_week:N", title=" ", sort=cat_order, axis=alt.Axis(labelAngle=360)),
        y=alt.Y("yearweek:N", title="Week (Year)", sort=None),
        color=alt.Color(
            "total_words:Q",
            scale=alt.Scale(range=["#E1CBD3", color_president]),
            title="Words per day"
        ),
        tooltip=["date:T", "total_words:Q", "yearweek:N"]
    )
    .properties(
        title="Presidenta Sheinbaum/Official",
        width=500,
        height=800
    )
)

# Step 2. Chart for the journalists
chart_journalists = (
    alt.Chart(df_1[df_1["speaker_group"] == "Journalist"])
    .mark_rect(stroke='white')
    .encode(
        x=alt.X("day_of_week:N", title=" ", sort=cat_order, axis=alt.Axis(labelAngle=360)),
        y=alt.Y("yearweek:N", title="Week (Year)", sort=None),
        color=alt.Color(
            "total_words:Q",
            scale=alt.Scale(range=["#CDE1CB", color_journalist]),
            title="Words per day"
        ),
        tooltip=["date:T", "total_words:Q", "yearweek:N"]
    )
    .properties(
        title="Journalists",
        width=500,
        height=800
    )
)

# Step 3. Combine side-by-side, almosrt like facet
combined_chart = (chart_president | chart_journalists).resolve_scale(color="independent").properties(
    title="Length of Mañaneras (Words per Day, by Speaker Type)"
)

# Step 4. Save figure
file_name = "01_heatmap_daily_lengths_by_actor.svg"
full_path = f"{output_folder}/{file_name}"
combined_chart.save(full_path)
combined_chart

### 2. Who speaks and how much?

In [96]:
df_2 = get_turn_taking_stats(df[pd.to_datetime(df["date"]).dt.dayofweek < 5])
df_2["date"] = pd.to_datetime(df_2["date"], errors="coerce")
df_2["ratio_smooth"] = df_2["ratio_president_journalist"].rolling(7, min_periods=1).mean()

df_3 = get_turn_taking_stats_interact(df[pd.to_datetime(df["date"]).dt.dayofweek < 5])
df_3["date"] = pd.to_datetime(df_3["date"], errors="coerce")
df_3["ratio_smooth"] = df_3["ratio_president_journalist"].rolling(7, min_periods=1).mean()

df_2["phase"] = "Whole Conference"
df_3["phase"] = "After Journalists Start"

df_combined = pd.concat([df_2, df_3], ignore_index=True)

# Compute the overall mean ratio after conversation starts
mean_ratio = df_3["ratio_smooth"].mean()

In [97]:
# Step 1. Base line chart (rolling averages), by phase of conference
chart_combined = (
    alt.Chart(df_combined)
    .mark_line(strokeWidth=3)
    .encode(
        x=alt.X("date:T", title="Date"),
        y=alt.Y(
            "ratio_smooth:Q",
            title="President to Journalist Turn Ratio (7-Day Average)"
        ),
        color=alt.Color(
            "phase:N",
            title="Section of Conference",
            scale=alt.Scale(
                domain=["Whole Conference", "After Journalists Start"],
                range=[color_president, color_more]
            )
        ),
        tooltip=[
            alt.Tooltip("date:T", title="Date"),
            alt.Tooltip("phase:N", title="Section"),
            alt.Tooltip("ratio_smooth:Q", title="7-Day Avg Ratio", format=".2f")
        ]
    )
    .properties(
        title="President vs Journalist Turn Ratio",
        width=1000,
        height=500
    )
)

# Step 2. Horizontal lines at mean and max (both numbers rounded)
line_8 = alt.Chart(pd.DataFrame({"y": [8]})).mark_rule(color="black", strokeDash=[3,3]).encode(y="y:Q")
text_8 = (
    alt.Chart(pd.DataFrame({"y": [8]}))
    .mark_text(align="left", dx=10, dy=-5, color="black", fontSize=12)
    .encode(y="y:Q", text=alt.value("Sometimes, the president talks 8 times more than the journalists"))
)

line_mean = alt.Chart(pd.DataFrame({"y": [round(mean_ratio)]})).mark_rule(color="black", strokeDash=[3,3]).encode(y="y:Q")
text_mean = (
    alt.Chart(pd.DataFrame({"y": [round(mean_ratio)]}))
    .mark_text(align="left", dx=10, dy=-5, color="black", fontSize=12)
    .encode(
        y="y:Q",
        text=alt.value(f"In average, she talks {round(mean_ratio)} times more than the journalists")
    )
)

# Step 3. Combine all layers
final_chart = chart_combined + line_8 + text_8 + line_mean + text_mean

file_name = "02_turn_taking_ratio.svg"
full_path = f"{output_folder}/{file_name}"
final_chart.save(full_path)
final_chart

### 3. Topic occurence by week. What do they talk about?

In [98]:
# Define topics and associated keywords
topics = {
    "Education": [
        "educacion", "educativo", "educativa", "educar", "aprendizaje", "ensenanza",
        "docente", "docentes", "maestro", "maestros", "profesor", "profesores",
        "estudiante", "estudiantes", "alumno", "alumnos", "beca", "becas",
        "universidad", "universidades", "campus", "facultad", "facultades",
        "instituto", "institutos", "escuela", "escuelas", "colegio", "colegios",
        "politecnico", "ipn", "unam", "conalep", "tecnologico", "normal", "educadora",
        "formacion", "capacitación", "literatura", "lectura", "alfabetizacion",
        "Secretaría de Educación", "SEP", "escuelas normales", "estudios superiores"
    ],
    
    "Migration": [
        "migracion", "migrante", "migrantes", "inmigrante", "inmigrantes", "emigrante",
        "refugio", "refugiado", "refugiados", "asilo", "deportacion", "deportado",
        "caravana", "caravanas", "movilidad humana", "cruce", "cruzar", "cruzando",
        "regularizacion", "documentacion", "estatus migratorio", "tránsito", "retorno",
        "frontera", "fronteras", "sur", "norte", "tapachula", "tijuana", "ciudad juarez",
        "estados unidos", "eeuu", "ee uu", "mexico-estados unidos", "centroamerica",
        "haitiano", "venezolano", "hondureno", "guatemalteco", "migratorio",
        "INM", "Instituto Nacional de Migración", "Comar", "crisis migratoria",
        "migración irregular", "protección a migrantes", "visado humanitario"
    ],
    
    "Poverty": [
        "pobreza", "pobre", "pobres", "carencia", "vulnerabilidad", "marginacion",
        "desigualdad", "exclusion social", "bienestar", "ayuda", "ayudas", "subsidio",
        "subsidios", "transferencia", "transferencias", "programa social",
        "programas sociales", "apoyo social", "prospera", "oportunidades",
        "pension", "pensiones", "adultos mayores", "familias", "hogares",
        "ingreso", "ingresos", "salario", "salarios", "empleo", "trabajo",
        "trabajador", "trabajadores", "economia popular", "comunidad", "marginalidad",
        "Sembrando Vida", "Jóvenes Construyendo el Futuro", "Banco del Bienestar",
        "Secretaría del Bienestar", "igualdad", "pobreza extrema", "zona rural",
        "desarrollo social", "redistribucion", "nivel de vida"
    ],
    
    "Health": [
        "salud", "salud publica", "hospital", "hospitales", "clinica", "clinicas",
        "centro de salud", "imss", "issste", "insabi", "imss bienestar",
        "medico", "medicos", "doctor", "doctora", "enfermero", "enfermera",
        "vacuna", "vacunas", "campaña de vacunacion", "covid", "covid19",
        "pandemia", "epidemia", "enfermedad", "enfermedades", "cancer", "diabetes",
        "salubridad", "medicamento", "medicamentos", "atencion medica", "consultorio",
        "prevencion", "rehabilitacion", "hospitalizacion", "servicios medicos",
        "sistema de salud", "cirugia", "medicina", "IMSS-Bienestar", "Salud Digna"
    ],
    
    "Security": [
        "seguridad", "seguridad publica", "violencia", "violento", "delincuencia",
        "delito", "delitos", "crimen", "crimen organizado", "criminal", "criminales",
        "policia", "policias", "guardia nacional", "gn", "ejercito", "marina",
        "sedena", "defensa", "militar", "militares", "fuerzas armadas", "operativo",
        "detencion", "captura", "combate", "armas", "armamento", "tiroteo",
        "homicidio", "asesinato", "feminicidio", "extorsion", "secuestro",
        "narcotrafico", "narco", "cartel", "carteles", "civiles", "justicia",
        "Ministerio Público", "Fiscalía", "seguridad nacional", "CNI", "SSPC"
    ],
    
    "Environment": [
        "medio ambiente", "ambiente", "ecologia", "ecologico", "ambiental",
        "sustentable", "sostenible", "sustentabilidad", "sostenibilidad",
        "agua", "rio", "rios", "laguna", "lagunas", "cuenca", "bosque", "bosques",
        "selva", "selvas", "reforestacion", "deforestacion", "manglar", "manglares",
        "energia", "energias", "renovable", "solar", "eolica", "hidroelectrica",
        "cambio climatico", "crisis climatica", "climatico", "clima", "temperatura",
        "calentamiento global", "contaminacion", "contaminante", "reciclaje",
        "biodiversidad", "naturaleza", "flora", "fauna", "aire limpio", "agua limpia",
        "medioambiental", "sembrando vida", "protección ambiental", "conanp", "conagua",
        "Secretaría del Medio Ambiente", "SEMARNAT"
    ],

    "Gender": [
    "genero", "igualdad", "igualdad de genero", "equidad", "equidad de genero",
    "mujer", "mujeres", "feminismo", "feminista", "feministas",
    "violencia de genero", "violencia contra las mujeres", "violencia familiar",
    "violencia domestica", "feminicidio", "feminicidios", "acoso", "hostigamiento",
    "hostigamiento sexual", "abuso sexual", "discriminacion", "patriarcado",
    "machismo", "machista", "empoderamiento", "empoderar", "cuidados",
    "brecha salarial", "igualdad sustantiva", "perspectiva de genero",
    "Instituto Nacional de las Mujeres", "INMUJERES",
    "Secretaría de las Mujeres", "mujeres indígenas", "mujeres trabajadoras",
    "derechos de las mujeres", "libertad sexual", "autonomía", "paridad de género",
    "inclusión", "no discriminación", "alerta de género"
    ],

    "Corruption": [
    "corrupcion", "corrupto", "corruptos", "anticorrupcion", "anticorrupción",
    "transparencia", "rendicion de cuentas", "rendición de cuentas",
    "honestidad", "honesto", "honestos", "impunidad", "impune", "impunes",
    "nepotismo", "soborno", "sobornos", "malversacion", "peculado",
    "trafico de influencias", "conflicto de interes", "clientelismo", "cohecho",
    "enriquecimiento ilícito", "fraude", "desvio de recursos", "auditoria", "auditorias",
    "contraloria", "contraloría", "SFP", "Secretaría de la Función Pública",
    "Fiscalía Anticorrupción", "investigacion", "denuncia", "denuncias",
    "castigo", "sancion", "sanciones", "transparente", "honradez", "ética pública",
    "moralidad", "integridad", "responsabilidad administrativa"
    ]
}

df_4 = get_topics_by_week_by_group(df, topics)

In [99]:
# Compute percentages so they are visually appealing
df_4["share_pct"] = df_4["share_smooth"] * 100

# Define color palette for speaker groups
speaker_colors = {
    "President/Official": color_president,
    "Journalist": color_journalist
}

# Define topic order (2 columns × 4 rows)
topic_order = ["Education", "Migration", "Poverty", "Health", "Security", "Environment", "Gender", "Corruption"]

# Step 1. Create the faceted chart
chart_topics_by_actor = (
    alt.Chart(df_4[df_4["topic"].isin(topic_order)])
    .mark_line(strokeWidth=3)
    .encode(
        x=alt.X("yearweek:N", title="Week"),
        y=alt.Y("share_pct:Q", title="Share of Speech (%)"),
        color=alt.Color(
            "speaker_group:N",
            title="Speaker Group",
            scale=alt.Scale(
                domain=list(speaker_colors.keys()),
                range=list(speaker_colors.values())
            ),
            legend=alt.Legend(
                title="Speaker Group",
                symbolStrokeWidth=4,
                symbolSize=150,
                labelFontSize=12,
                titleFontSize=13
            )
        ),
        facet=alt.Facet("topic:N", title=None, columns=2, sort=topic_order),
        tooltip=[
            alt.Tooltip("yearweek:N", title="Week"),
            alt.Tooltip("topic:N", title="Topic"),
            alt.Tooltip("speaker_group:N", title="Speaker Group"),
            alt.Tooltip("share_pct:Q", title="Share (%)", format=".1f")
        ]
    )
    .properties(
        title="Weekly Topic Mentions by Speaker Group",
        width=500,
        height=250
    )
)

# Step 2. Save chart
file_name = "03_topic_trends_by_actor.svg"
full_path = f"{output_folder}/{file_name}"
chart_topics_by_actor.save(full_path)
chart_topics_by_actor

### 4. States occurence by week. What do they talk about?

In [100]:
df_5 = count_state_mentions_by_group(df)
# Exclude "Estado de Mexico" from the chart because it skews the visualization.
# I still need to figure out why it is being counted so much more than other states.
df_5 = df_5[df_5['state'] != "Estado de Mexico"]  # Exclude "Estado de Mexico


In [101]:
# Step 1. Load and normalize geographic data
geo_url = "https://raw.githubusercontent.com/angelnmara/geojson/master/mexicoHigh.json"
mexico_gdf = gpd.read_file(geo_url)
mexico_gdf["state_norm"] = mexico_gdf["name"].apply(normalize_name)

# Step 2. Prepare mentions data so it is "mergeable"
df_states = df_5.copy()
df_states["state_norm"] = df_states["state"].apply(normalize_name)

# Step 3. Separate data by speaker group
df_pres = df_states[df_states["speaker_group"] == "President/Official"].copy()
df_jour = df_states[df_states["speaker_group"] == "Journalist"].copy()

# Step 4. Compute share of total mentions per group
df_pres["pct"] = (df_pres["mentions"] / df_pres["mentions"].sum()) * 100
df_jour["pct"] = (df_jour["mentions"] / df_jour["mentions"].sum()) * 100

# Step 5. Merge with maps information for each group
merged_pres = (
    mexico_gdf
    .merge(df_pres, on="state_norm", how="left")
    .fillna({"mentions": 0, "pct": 0})
)
merged_jour = (
    mexico_gdf
    .merge(df_jour, on="state_norm", how="left")
    .fillna({"mentions": 0, "pct": 0})
)

# Step 6. Convert to geo json for altair
geo_pres = {"values": merged_pres.__geo_interface__["features"]}
geo_jour = {"values": merged_jour.__geo_interface__["features"]}

# Step 7. Map: President / Officials
chart_pres_map = (
    alt.Chart(geo_pres)
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .encode(
        color=alt.Color(
            "properties.pct:Q",
            title="% of Mentions",
            scale=alt.Scale(range=["#E1CBD3", color_president])
        ),
        tooltip=[
            alt.Tooltip("properties.name:N", title="State"),
            alt.Tooltip("properties.pct:Q", title="% of Mentions", format=".2f")
        ]
    )
    .properties(
        title="Mentions of Mexican States — President/Officials",
        width=500,
        height=350
    )
    .project("mercator")
)

# Step 8. Map: Journalists
chart_jour_map = (
    alt.Chart(geo_jour)
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .encode(
        color=alt.Color(
            "properties.pct:Q",
            title="% of Mentions",
            scale=alt.Scale(range=["#C5CAC0", color_journalist])
        ),
        tooltip=[
            alt.Tooltip("properties.name:N", title="State"),
            alt.Tooltip("properties.pct:Q", title="% of Mentions", format=".2f")
        ]
    )
    .properties(
        title="Mentions of Mexican States — Journalists",
        width=500,
        height=350
    )
    .project("mercator")
)

# Step 9. Mini bar charts with Top 5 States per group
top5_pres = df_pres.nlargest(5, "pct")
top5_jour = df_jour.nlargest(5, "pct")

bars_pres = (
    alt.Chart(top5_pres)
    .mark_bar(color=color_president, cornerRadius=3)
    .encode(
        x=alt.X("pct:Q", title="% of Mentions"),
        y=alt.Y("state:N", sort="-x", title=None),
        tooltip=["state:N", alt.Tooltip("pct:Q", format=".2f")]
    )
    .properties(width=200, height=120, title="Top 5 States")
)

bars_jour = (
    alt.Chart(top5_jour)
    .mark_bar(color=color_journalist, cornerRadius=3)
    .encode(
        x=alt.X("pct:Q", title="% of Mentions"),
        y=alt.Y("state:N", sort="-x", title=None),
        tooltip=["state:N", alt.Tooltip("pct:Q", format=".2f")]
    )
    .properties(width=200, height=120, title="Top 5 States")
)

# Step 10. Combine maps and bars vertically 
pres_column = alt.vconcat(chart_pres_map, bars_pres).resolve_scale(color="independent")
jour_column = alt.vconcat(chart_jour_map, bars_jour).resolve_scale(color="independent")

# Step 11. Combine side by side both president and journalists info
chart_final = (
    alt.hconcat(pres_column, jour_column)
    .configure_view(stroke=None)
    .properties(
        title="Percentage of Mentions of Mexican States by Speaker Type"
    )
)

# Step 12. Save chart
file_name = "04_most_mentioned_states.svg"
full_path = f"{output_folder}/{file_name}"
chart_final.save(full_path)
chart_final

### 5. How does the sentiment of the discussion evolves?

### Date 1: Fight for violence in the country

In [102]:
# Step 1. Define target data and compute sentiment for that date
target_date = "2025-03-13"
conf_sent = compute_sentiment_for_date(df, target_date)
conf_sent = conf_sent.loc[:, ~conf_sent.columns.duplicated()]


In [103]:
# Step 2. Compute average sentiment per group for subtitle
avg_sentiments = (
    conf_sent.groupby("speaker_group")["sentiment_score"]
    .mean()
    .round(2)
    .to_dict()
)
avg_pres = avg_sentiments.get("President/Official", 0)
avg_jour = avg_sentiments.get("Journalist", 0)

# Step 3. Build readable summary for subtitle
subtitle_text = (
    f"Average Sentiment — President/Officials: {avg_pres:+.2f} | "
    f"Journalists: {avg_jour:+.2f}  "
    "(Scale: -1 = Negative, 0 = Neutral, +1 = Positive)"
)

# Step 4. Sentiment zones
zones = pd.DataFrame({
    "zone": ["Positive", "Neutral", "Negative"],
    "y0": [0.1, -0.1, -1],
    "y1": [1, 0.1, -0.1],
    "color": ["#E3F2FD", "#F5F5F5", "#FFEBEE"]
})

# Step 5. Build a layer with the sentiment zones
zone_chart = (
    alt.Chart(zones)
    .mark_rect()
    .encode(
        y="y0:Q",
        y2="y1:Q",
        color=alt.Color("color:N", scale=None, legend=None)
    )
)

# Step 6. Points with individual interventions classified
points = (
    alt.Chart(conf_sent)
    .mark_circle(size=55, opacity=1)
    .encode(
        x=alt.X("intervention_order:Q", title="Intervention Order"),
        y=alt.Y("sentiment_score:Q", title="Sentiment Score (-1 = Negative, +1 = Positive)"),
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            title="Speaker"
        ),
        tooltip=[
            "intervention_order:Q",
            "speaker_group:N",
            "sentiment_label:N",
            alt.Tooltip("p_pos:Q", title="P(Pos)", format=".2f"),
            alt.Tooltip("p_neu:Q", title="P(Neu)", format=".2f"),
            alt.Tooltip("p_neg:Q", title="P(Neg)", format=".2f"),
            alt.Tooltip("text:N", title="Excerpt")
        ]
    )
)

# Step 7. Smooth trends per group
smooth_lines = (
    alt.Chart(conf_sent)
    .transform_loess("intervention_order", "sentiment_score", groupby=["speaker_group"])
    .mark_line(size=3)
    .encode(
        x="intervention_order:Q",
        y="sentiment_score:Q",
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            legend=None
        )
    )
)

# Step 8. Neutral line
neutral_line = (
    alt.Chart(pd.DataFrame({"y": [0]}))
    .mark_rule(color="black", strokeDash=[4, 2])
    .encode(y="y:Q")
)

# Step 9. Combine all layers
chart_sentiment = (
    zone_chart + neutral_line + points + smooth_lines
).properties(
    title={
        "text": [f"Evolution of Sentiment during the Mañanera — {pd.to_datetime(target_date).date()}"],
        "subtitle": [subtitle_text],
        "anchor": "middle",
        "fontSize": 18,
        "subtitleFontSize": 13,
        "subtitleColor": "#555"
    },
    width=1100,
    height=450
).configure_view(stroke=None)

# Step 10. Save chart
file_name = "05_sentiment_2025_03_13.svg"
full_path = f"{output_folder}/{file_name}"
chart_sentiment.save(full_path)
chart_sentiment

### Date 2: Term start date

In [104]:
# Step 1. Define target data and compute sentiment for that date
target_date = "2024-10-02"
conf_sent = compute_sentiment_for_date(df, target_date)
conf_sent = conf_sent.loc[:, ~conf_sent.columns.duplicated()]


In [105]:
# Step 2. Compute average sentiment per group for subtitle
avg_sentiments = (
    conf_sent.groupby("speaker_group")["sentiment_score"]
    .mean()
    .round(2)
    .to_dict()
)

avg_pres = avg_sentiments.get("President/Official", 0)
avg_jour = avg_sentiments.get("Journalist", 0)

# Step 3. Build readable summary for subtitle
subtitle_text = (
    f"Average Sentiment — President/Officials: {avg_pres:+.2f} | "
    f"Journalists: {avg_jour:+.2f}  "
    "(Scale: -1 = Negative, 0 = Neutral, +1 = Positive)"
)

# Step 4. Sentiment zones
zones = pd.DataFrame({
    "zone": ["Positive", "Neutral", "Negative"],
    "y0": [0.1, -0.1, -1],
    "y1": [1, 0.1, -0.1],
    "color": ["#E3F2FD", "#F5F5F5", "#FFEBEE"]  # light blue, gray, light red
})

# Step 5. Build a layer with the sentiment zones
zone_chart = (
    alt.Chart(zones)
    .mark_rect()
    .encode(
        y="y0:Q",
        y2="y1:Q",
        color=alt.Color("color:N", scale=None, legend=None)
    )
)

# Step 6. Points with individual interventions classified
points = (
    alt.Chart(conf_sent)
    .mark_circle(size=55, opacity=1)
    .encode(
        x=alt.X("intervention_order:Q", title="Intervention Order"),
        y=alt.Y("sentiment_score:Q", title="Sentiment Score (-1 = Negative, +1 = Positive)"),
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            title="Speaker"
        ),
        tooltip=[
            "intervention_order:Q",
            "speaker_group:N",
            "sentiment_label:N",
            alt.Tooltip("p_pos:Q", title="P(Pos)", format=".2f"),
            alt.Tooltip("p_neu:Q", title="P(Neu)", format=".2f"),
            alt.Tooltip("p_neg:Q", title="P(Neg)", format=".2f"),
            alt.Tooltip("text:N", title="Excerpt")
        ]
    )
)

# Step 7. Smooth trends per group
smooth_lines = (
    alt.Chart(conf_sent)
    .transform_loess("intervention_order", "sentiment_score", groupby=["speaker_group"])
    .mark_line(size=3)
    .encode(
        x="intervention_order:Q",
        y="sentiment_score:Q",
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            legend=None
        )
    )
)

# Step 8. Neutral line
neutral_line = (
    alt.Chart(pd.DataFrame({"y": [0]}))
    .mark_rule(color="black", strokeDash=[4, 2])
    .encode(y="y:Q")
)

# Step 9. Combine all layers
chart_sentiment = (
    zone_chart + neutral_line + points + smooth_lines
).properties(
    title={
        "text": [f"Evolution of Sentiment during the Mañanera — {pd.to_datetime(target_date).date()}"],
        "subtitle": [subtitle_text],
        "anchor": "middle",
        "fontSize": 18,
        "subtitleFontSize": 13,
        "subtitleColor": "#555"
    },
    width=1100,
    height=450
).configure_view(stroke=None)

# Step 10. Save chart
file_name = "05_sentiment_2024_10_02.svg"
full_path = f"{output_folder}/{file_name}"
chart_sentiment.save(full_path)
chart_sentiment

### Date 3: Last conference

In [106]:
# Step 1. Define target data and compute sentiment for that date
target_date = "2025-08-13"
conf_sent = compute_sentiment_for_date(df, target_date)
conf_sent = conf_sent.loc[:, ~conf_sent.columns.duplicated()]


In [107]:
# Step 2. Compute average sentiment per group for subtitle
avg_sentiments = (
    conf_sent.groupby("speaker_group")["sentiment_score"]
    .mean()
    .round(2)
    .to_dict()
)

avg_pres = avg_sentiments.get("President/Official", 0)
avg_jour = avg_sentiments.get("Journalist", 0)

# Step 3. Build readable summary for subtitle
subtitle_text = (
    f"Average Sentiment — President/Officials: {avg_pres:+.2f} | "
    f"Journalists: {avg_jour:+.2f}  "
    "(Scale: -1 = Negative, 0 = Neutral, +1 = Positive)"
)

# Step 4. Sentiment zones
zones = pd.DataFrame({
    "zone": ["Positive", "Neutral", "Negative"],
    "y0": [0.1, -0.1, -1],
    "y1": [1, 0.1, -0.1],
    "color": ["#E3F2FD", "#F5F5F5", "#FFEBEE"]  # light blue, gray, light red
})

# Step 5. Build a layer with the sentiment zones
zone_chart = (
    alt.Chart(zones)
    .mark_rect()
    .encode(
        y="y0:Q",
        y2="y1:Q",
        color=alt.Color("color:N", scale=None, legend=None)
    )
)

# Step 6. Points with individual interventions classified
points = (
    alt.Chart(conf_sent)
    .mark_circle(size=55, opacity=1)
    .encode(
        x=alt.X("intervention_order:Q", title="Intervention Order"),
        y=alt.Y("sentiment_score:Q", title="Sentiment Score (-1 = Negative, +1 = Positive)"),
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            title="Speaker"
        ),
        tooltip=[
            "intervention_order:Q",
            "speaker_group:N",
            "sentiment_label:N",
            alt.Tooltip("p_pos:Q", title="P(Pos)", format=".2f"),
            alt.Tooltip("p_neu:Q", title="P(Neu)", format=".2f"),
            alt.Tooltip("p_neg:Q", title="P(Neg)", format=".2f"),
            alt.Tooltip("text:N", title="Excerpt")
        ]
    )
)

# Step 7. Smooth trends per group
smooth_lines = (
    alt.Chart(conf_sent)
    .transform_loess("intervention_order", "sentiment_score", groupby=["speaker_group"])
    .mark_line(size=3)
    .encode(
        x="intervention_order:Q",
        y="sentiment_score:Q",
        color=alt.Color(
            "speaker_group:N",
            scale=alt.Scale(
                domain=["President/Official", "Journalist"],
                range=[color_president, color_journalist]
            ),
            legend=None
        )
    )
)

# Step 8. Neutral line
neutral_line = (
    alt.Chart(pd.DataFrame({"y": [0]}))
    .mark_rule(color="black", strokeDash=[4, 2])
    .encode(y="y:Q")
)

# Step 9. Combine all layers
chart_sentiment = (
    zone_chart + neutral_line + points + smooth_lines
).properties(
    title={
        "text": [f"Evolution of Sentiment during the Mañanera — {pd.to_datetime(target_date).date()}"],
        "subtitle": [subtitle_text],
        "anchor": "middle",
        "fontSize": 18,
        "subtitleFontSize": 13,
        "subtitleColor": "#555"
    },
    width=1100,
    height=450
).configure_view(stroke=None)

# Step 10. Save chart
file_name = "05_sentiment_2025_08_13.svg"
full_path = f"{output_folder}/{file_name}"
chart_sentiment.save(full_path)
chart_sentiment