# Information visualization - system A

In [3]:
import pandas as pd
import altair as alt
import panel as pn
import itertools

In [4]:
# Make Altair unlimited by the number of lines
alt.data_transformers.disable_max_rows()

######################
# 1. Read & preprocess data
######################
CSV_FILE = "dataset/weather_prediction_dataset.csv"
df_raw = pd.read_csv(CSV_FILE)
df_raw['DATE'] = pd.to_datetime(df_raw['DATE'], format='%Y%m%d')
weather_remove = {'cloud_cover', 'pressure', 'global_radiation', 'sunshine', 'wind_speed', 'wind_gust'}
df_raw = df_raw.drop(columns=[col for col in df_raw.columns if any(keyword in col for keyword in weather_remove)])

###########################
# 1.1 Correction of parsing city and attribute functions
###########################
def extract_cities_and_attrs(df):
    exclude_cols = ['DATE']  # Columns to exclude
    weather_cols = [c for c in df.columns if c not in exclude_cols]

    cities = set()
    attrs = set()

    for col in weather_cols:
        if col.startswith("DE_BILT_"):
            city = "DE_BILT"
            attribute = col[len("DE_BILT_"):]
        else:
            idx = col.find('_')
            if idx == -1:
                continue
            city = col[:idx]
            attribute = col[idx+1:]

        cities.add(city)
        attrs.add(attribute)

    return sorted(cities), sorted(attrs)

all_cities, all_attrs = extract_cities_and_attrs(df_raw)

In [5]:
##################
# 2. Define the Panel control
##################
pn.extension()

# 2.1 Select a city (multiple choices)
city_select = pn.widgets.MultiChoice(
    name="Select Cities",
    options=all_cities,
    value=["DE_BILT"],
    placeholder="Add or remove cities...",
    solid=True,
    width=515
)

# 2.2 Select attribute (single choice)
attr_select = pn.widgets.Select(
    name="Select Attribute",
    options=all_attrs,
    value="temp_mean",
    width=320
)

# 2.3 Time Range Slider
date_start = df_raw['DATE'].min()
date_end = df_raw['DATE'].max()
date_range = pn.widgets.DateRangeSlider(
    name="Date Range",
    start=date_start,
    end=date_end,
    value=(date_start, date_end),
    step=1,
    width=850
)

# 2.4 Data Table
data_table = pn.widgets.DataFrame(df_raw, width=900, height=400)

# 2.5 Color selection control
default_colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
                  "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
color_cycle = itertools.cycle(default_colors)

city_colors = {city: next(color_cycle) for city in all_cities}

selected_city = pn.widgets.Select(name="Choose City", options=all_cities, value="DE_BILT")
selected_color = pn.widgets.ColorPicker(name="Pick Color", value=city_colors["DE_BILT"], width=150)

def update_color_picker(event):
    selected_color.value = city_colors[selected_city.value]

selected_city.param.watch(update_color_picker, 'value')

def update_city_color(event):
    city_colors[selected_city.value] = selected_color.value
    update_plot()

selected_color.param.watch(update_city_color, 'value')

Watcher(inst=ColorPicker(name='Pick Color', value='#2ca02c', width=150), cls=<class 'panel.widgets.input.ColorPicker'>, fn=<function update_city_color at 0x000002554A098C20>, mode='args', onlychanged=True, parameter_names=('value',), what='value', queued=False, precedence=0)

In [6]:
###############
# 3. Pie chart function
###############
def make_pie_chart(attribute, date_tuple):
    # Global objects used in outer layers
    global df_raw, city_select, city_colors, all_cities

    start_date, end_date = date_tuple

    # 1) Filter time range
    df_filtered = df_raw[
        (df_raw["DATE"] >= pd.to_datetime(start_date)) & 
        (df_raw["DATE"] <= pd.to_datetime(end_date))
    ].copy()

    # 2) Calculate the sum of the corresponding attributes of "all cities" (or change to average value, etc.)
    data_list = []
    for city in all_cities:
        col_name = f"{city}_{attribute}"
        if col_name in df_filtered.columns:
            total_val = df_filtered[col_name].sum()
            data_list.append({"city": city, "value": total_val})
    df_pie = pd.DataFrame(data_list)

    # If there is no data, return to the prompt image
    if df_pie.empty:
        return alt.Chart(pd.DataFrame({'note': ['No data available.']})) \
                  .mark_text().encode(text='note:N') \
                  .properties(width=300, height=300)

    # 3) Calculate the proportion, which will be used later when marking
    total_value = df_pie["value"].sum()
    df_pie["pct"] = df_pie["value"] / total_value

    # 4) Define color and highlight logic
    color_scale = alt.Scale(
        domain=list(city_colors.keys()),
        range=list(city_colors.values())
    )
    # Determine whether the city is in city_select.value
    highlight_condition = alt.FieldOneOfPredicate(field='city', oneOf=city_select.value)

    # 5) The color of the pie chart body uses the color corresponding to the city, and the opacity is controlled by the condition
    pie_chart = alt.Chart(df_pie).mark_arc().encode(
        theta=alt.Theta("value:Q", stack=True),
        color=alt.Color("city:N", scale=color_scale),
        opacity=alt.condition(
            highlight_condition,
            alt.value(1.0),    # Selected city: opaque
            alt.value(0.3)     # Unselected cities: semi-transparent
        ),
        tooltip=[
            alt.Tooltip("city:N", title="城市"),
            alt.Tooltip("value:Q", title="值"),
            alt.Tooltip("pct:Q", title="占比", format=".1%")
        ]
    ).properties(width=400, height=300)

    # 6) The radius, size and other parameters of the mark_text that displays percentages on the pie chart can be fine-tuned as needed
    text_labels = alt.Chart(df_pie).mark_text(radius=130, size=12).encode(
        theta=alt.Theta("value:Q", stack=True),
        text=alt.Text("pct:Q", format=".1%"),
        color=alt.value("white")
    )

    return pie_chart

In [7]:
###############
# 4. Core drawing functions
###############
def make_charts(cities, attribute, date_tuple):
    """
    返回一个 Altair HConcat：左侧是 vconcat(大图 + 小图)，右侧是饼图
    """
    start_date, end_date = date_tuple

    # 1) Filter time range
    df_filtered = df_raw[
        (df_raw["DATE"] >= pd.to_datetime(start_date)) &
        (df_raw["DATE"] <= pd.to_datetime(end_date))
    ].copy()

    # 2) Melt the required fields into a table format
    melt_records = []
    for city in cities:
        col_name = f"{city}_{attribute}"
        if col_name in df_filtered.columns:
            for _, row in df_filtered.iterrows():
                melt_records.append({
                    "DATE": row["DATE"],
                    "city": city,
                    "value": row[col_name]
                })

    df_melt = pd.DataFrame(melt_records)
    if df_melt.empty:
        return alt.Chart(pd.DataFrame({'note': ['No data available.']})).mark_text().encode(text='note:N')

    # 3) Define the "nearest point" selection (single selection, based on DATE)
    hover = alt.selection_single(
        fields=["DATE"],
        nearest=True,
        on="mouseover",
        empty="none",
        clear="mouseout"
    )

    color_scale = alt.Scale(
        domain=list(city_colors.keys()), 
        range=list(city_colors.values())
    )

    # Big picture: multi-layer writing
    lines = alt.Chart(df_melt).mark_line().encode(
        x=alt.X("DATE:T", title="Date"),
        y=alt.Y("value:Q", title=f"{attribute} Value"),
        color=alt.Color("city:N", scale=color_scale, legend=alt.Legend(title="City"))
    )

    selectors = alt.Chart(df_melt).mark_point().encode(
        x="DATE:T",
        opacity=alt.value(0)
    ).add_selection(hover)

    points = lines.mark_point().encode(
        opacity=alt.condition(hover, alt.value(1), alt.value(0))
    )

    rule = alt.Chart(df_melt).mark_rule(color='gray').encode(
        x="DATE:T",
        tooltip=[
            alt.Tooltip("DATE:T", title="Date"),
            alt.Tooltip("city:N", title="City"),
            alt.Tooltip("value:Q", title=f"{attribute} Value")
        ]
    ).transform_filter(hover)

    big_chart = alt.layer(
        lines, selectors, points, rule
    ).properties(
        width=600,
        height=300
    ).interactive()

    # Small picture: stacked bar chart
    small_chart = alt.Chart(df_melt).mark_bar(size=2).encode(
        x=alt.X("DATE:T", title="Date"),
        y=alt.Y("value:Q", aggregate="sum", title=f"{attribute} Cumulative Value"),
        color=alt.Color("city:N", scale=color_scale),
        opacity=alt.condition(hover, alt.value(1), alt.value(0.5)),
        stroke=alt.condition(hover, alt.value('black'), alt.value(None)),
        strokeWidth=alt.condition(hover, alt.value(1), alt.value(0)),
        tooltip=[
            alt.Tooltip("DATE:T", title="Date"),
            alt.Tooltip("city:N", title="City"),
            alt.Tooltip("value:Q", aggregate="sum", title=f"{attribute} Cumulative Value")
        ]
    ).properties(
        width=600,
        height=150
    ).add_selection(hover)

    # Pie Chart
    pie_chart = make_pie_chart(attribute, (start_date, end_date))

    # Combination: vconcat (large picture + small picture) on the left, pie chart on the right
    return alt.hconcat(
        alt.vconcat(big_chart, small_chart, spacing=20),
        pie_chart
    )

In [8]:
##############
# 5. Statistical summary
##############
summary_pane = pn.pane.Markdown(sizing_mode="stretch_width")

def update_summary():
    """
    Calculate the "average" (or other statistical indicators) for the currently selected city, attribute, and date range, and update it to summary_pane.
    """
    cities = city_select.value
    attribute = attr_select.value
    start_date, end_date = date_range.value
    
    # 1) Filter time range
    df_filtered = df_raw[
        (df_raw["DATE"] >= pd.to_datetime(start_date)) &
        (df_raw["DATE"] <= pd.to_datetime(end_date))
    ].copy()

    # 2) Calculate the average for each city
    lines = []
    for city in cities:
        col_name = f"{city}_{attribute}"
        if col_name in df_filtered.columns:
            avg_val = df_filtered[col_name].mean()
            if pd.notna(avg_val):
                lines.append(f"- **{city}** average {attribute}：`{avg_val:.2f}` on the selected days.")

    if lines:
        md_text = "### Statistical Results\n" + "\n".join(lines)
    else:
        md_text = "### Statistical Results\n*No data available for current selection.*"
    
    summary_pane.object = md_text

In [9]:
##############
# 6. Callback function
##############
plot_pane = pn.pane.Vega(sizing_mode="stretch_width")

def update_plot(event=None):
    sel_cities = city_select.value
    sel_attr = attr_select.value
    sel_dates = date_range.value
    chart = make_charts(sel_cities, sel_attr, sel_dates)
    plot_pane.object = chart

    # When the drawing is updated, the statistics are also updated
    update_summary()

# Register the control's callback
city_select.param.watch(update_plot, 'value')
attr_select.param.watch(update_plot, 'value')
date_range.param.watch(update_plot, 'value')

# init
update_plot()

Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)


In [10]:
#########
# 7. Layout
#########
widgets = pn.Column(
    "<h3>Selection conditions</h3>",
    pn.Row(city_select, attr_select, width=300),
    date_range,
    width=300
)

tabs = pn.Tabs(
    ("Visualization", pn.Column(
        widgets,
        #Insert summary statistics panel above the chart
        summary_pane,
        pn.Spacer(height=10),
        pn.Column("<h3>Visualization results</h3>", plot_pane, sizing_mode="stretch_both"),
        width=600
    )),
    ("Data Sheet", pn.Column("<h3>Raw Data Table</h3>", data_table))
)

layout = pn.Column("<h2>Weather Data Analysis Panel</h2>", tabs)

def main():
    return layout

if __name__ == '__main__':
    pn.serve(layout, show=True)

Launching server at http://localhost:54557


Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(hover)
Deprecated since `altair=5.0.0`. Use add_params in