# Information visualization - system C

In [7]:
import pandas as pd
import altair as alt
import panel as pn
from vega_datasets import data
import numpy as np
from typing import List, Dict, Optional, Union
import logging
import ipywidgets as widgets

# log
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

pn.extension("vega")

In [8]:
# select weather features and cities
WEATHER_FEATURES = ["humidity", "precipitation", "temp_mean", "temp_min", "temp_max"]
CITIES = [
    "BASEL",
    "BUDAPEST",
    "DE_BILT",
    "DRESDEN",
    "DUSSELDORF",
    "HEATHROW",
    "KASSEL",
    "LJUBLJANA",
    "MAASTRICHT",
    "MALMO",
    "MONTELIMAR",
    "MUENCHEN",
    "OSLO",
    "PERPIGNAN",
    "ROMA",
    "SONNBLICK",
    "STOCKHOLM",
    "TOURS",
]

# city coordinates (for map)
CITY_COORDINATES = {
    "BASEL": [7.587708367346356, 47.55864674084195],
    "BUDAPEST": [19.033473479848716, 47.502437727500194],
    "DE_BILT": [5.182315082417806, 52.10908465797479],
    "DRESDEN": [13.757467830589755, 51.04996305973192],
    "DUSSELDORF": [6.779319650956459, 51.22329021198473],
    "HEATHROW": [-0.4548854457137284, 51.46898948710798],
    "KASSEL": [9.489794353462973, 51.31163822333451],
    "LJUBLJANA": [14.491583792721777, 46.057273386073],
    "MAASTRICHT": [5.685110598679014, 50.8510488887487],
    "MALMO": [12.997430301600138, 55.605259724404085],
    "MONTELIMAR": [4.750217189048996, 44.55781892343568],
    "MUENCHEN": [11.567915922004671, 48.132800647098136],
    "OSLO": [10.761074484519954, 59.91544378923583],
    "PERPIGNAN": [2.897298172031163, 42.687392709153016],
    "ROMA": [12.483112486306219, 41.914552114205705],
    "SONNBLICK": [12.957204503640597, 47.05496669914596],
    "STOCKHOLM": [18.077905872662523, 59.330046148192885],
    "TOURS": [0.44567552606764826, 48.14459973023859],
}

# map feature colors
FEATURE_COLORS = {
    "humidity": "rgba(127, 192, 127, 0.8)",  # Soft green
    "precipitation": "rgba(70, 130, 180, 0.8)",  # Dark blue
    "temp_mean": "rgba(244, 162, 97, 0.8)",  # orange color
    "temp_min": "rgba(233, 196, 106, 0.8)",  # Golden
    "temp_max": "rgba(231, 111, 81, 0.8)",  # Terracotta
}

# map feature shapes
FEATURE_SHAPES = {
    "humidity": "circle",
    "precipitation": "square",
    "temp_mean": "triangle",
    "temp_min": "diamond",
    "temp_max": "cross",
}

# Month Name to Number Mapping
MONTH_MAP = {
    1: "January", 2: "February", 3: "March", 4: "April", 5: "May", 6: "June",
    7: "July", 8: "August", 9: "September", 10: "October", 11: "November", 12: "December"
}

In [9]:
class WeatherDataProcessor:
    def __init__(self, file_path: str):
        """Initialize data processor"""
        try:
            self.df = pd.read_csv(file_path)
            self._preprocess_data()
        except Exception as e:
            logger.error(f"Data loading failed: {str(e)}")
            raise

    def _preprocess_data(self) -> None:
        """Data preprocessing"""
        try:
            self.df["DATE"] = pd.to_datetime(self.df["DATE"], format="%Y%m%d")
            self.df["YEAR"] = self.df["DATE"].dt.year
            self.df["MONTH"] = self.df["DATE"].dt.month
        except Exception as e:
            logger.error(f"Data preprocessing failed: {str(e)}")
            raise

    @staticmethod
    def extract_city_feature(col_name: str) -> tuple:
        """Extract city and feature information from column names"""
        try:
            # Handling city names containing underscores
            parts = col_name.split("_")
            if len(parts) < 2:
                return None, None

            # Process city names
            if parts[0] == "DE":
                city = "DE_BILT"
                feature = "_".join(parts[2:])
            else:
                city = parts[0]
                feature = "_".join(parts[1:])

            # Check if feature is in predefined list
            if feature in WEATHER_FEATURES:
                return city, feature
            return None, None
        except Exception as e:
            logger.error(f"Feature extraction failed: {str(e)}")
            return None, None

    def prepare_data(
        self, selected_year: int, selected_months: List[int], selected_feature: str
    ) -> pd.DataFrame:
        """Preparing data for visualization"""
        try:
            filtered_df = self.df[self.df["YEAR"] == selected_year]

            if not selected_months:
                selected_months = list(range(1, 13))
            filtered_df = filtered_df[filtered_df["MONTH"].isin(selected_months)]

            feature_columns = [
                col for col in self.df.columns if col.endswith(f"_{selected_feature}")
            ]

            if not feature_columns:
                logger.warning(f"Feature not found '{selected_feature}' Data")
                return pd.DataFrame()

            # Select the required columns
            filtered_df = filtered_df[["YEAR", "MONTH"] + feature_columns]
            # Handling missing values
            filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")

            melted_df = filtered_df.melt(
                id_vars=["YEAR", "MONTH"],
                value_vars=feature_columns,
                var_name="City_Feature",
                value_name="Value",
            )

            melted_df[["City", "Feature"]] = melted_df["City_Feature"].apply(
                lambda x: pd.Series(self.extract_city_feature(x))
            )
            melted_df.dropna(subset=["City", "Feature", "Value"], inplace=True)

            agg_func = "sum" if selected_feature == "precipitation" else "mean"
            # Aggregate data by city and month
            result_df = (
                melted_df.groupby(["City", "MONTH"])["Value"]
                .agg(agg_func)
                .reset_index()
            )

            # Ensure all cities have data
            all_cities = pd.DataFrame({"City": CITIES})
            result_df = all_cities.merge(result_df, on="City", how="left")

            # Make sure the month is not empty
            result_df["MONTH"] = result_df["MONTH"].fillna(1)

            # Add feature columns
            result_df["Feature"] = selected_feature

            return result_df

        except Exception as e:
            logger.error(f"Data preparation failed: {str(e)}")
            return pd.DataFrame()

In [10]:
class WeatherVisualizer:
    
    def __init__(self, data_processor: WeatherDataProcessor):
        """Initialize the visualizer"""
        self.data_processor = data_processor

    def generate_charts(self, selected_year: int, selected_months: List[int],
                    selected_feature: str, selected_cities: List[str]) -> alt.Chart:
        """Generate map visualization"""
        try:
            data = self.data_processor.prepare_data( selected_year, selected_months, selected_feature )
            data = data[data['Feature']==selected_feature]

            if data.empty:
                return alt.Chart().mark_text(text="No data available").properties( width=800, height=500 )

            # Add coordinate information
            data['Longitude'] = data['City'].map(lambda x: CITY_COORDINATES.get(x, [None, None])[0])
            data['Latitude'] = data['City'].map(lambda x: CITY_COORDINATES.get(x, [None, None])[1])
            data = data.dropna(subset=['Latitude', 'Longitude'])

            # Print debug information
            logger.info(f"Map Data Shape: {data.shape}")
            print("Map Data Preview:")
            print(data.head())  # Check if data exists and is structured correctly

            # Interaction Settings
            hover = alt.selection_point(on='pointerover', nearest=True, fields=['Longitude', 'Latitude'])
            brush = alt.selection_interval(encodings=['longitude', 'latitude'], name='brush', empty=True)

            # Mapping
            points = alt.Chart(data).mark_circle().encode(
                longitude='Longitude:Q',
                latitude='Latitude:Q',
                size=alt.Size('Value:Q', scale=alt.Scale(range=[50, 800]), title='value'),
                color=alt.Color('Feature:N',
                    scale=alt.Scale(domain=list(FEATURE_COLORS.keys()),
                    range=list(FEATURE_COLORS.values())),
                    legend=alt.Legend(title="feature")),
                tooltip=['City:N', 'Longitude:Q', 'Latitude:Q', 'Feature:N', 'Value:Q'],
                opacity=alt.condition(hover | brush, alt.value(1), alt.value(0.1))
            ).add_params(hover, brush)

            # Background Map
            europe_url = "https://raw.githubusercontent.com/leakyMirror/map-of-europe/master/TopoJSON/europe.topojson"
            europe = alt.topo_feature(europe_url, "europe")
            background = alt.Chart(europe).mark_geoshape(
                fill='lightgray', stroke='white'
            ).properties(width=800, height=850).project(type='mercator')

            # brush chart connect to map
            brush_chart = alt.Chart(data).mark_bar().encode(
                x=alt.X('mean(Value):Q' if selected_feature in ['humidity', 'temp_mean', 'temp_min', 'temp_max'] else 'sum(Value):Q',
                        title="mean" if selected_feature in ['humidity', 'temp_mean', 'temp_min', 'temp_max'] else "sum" ),
                y=alt.Y('City:N', title="locations", sort="-x"),
                color=alt.Color('MONTH', title="month", sort=list(range(1, 13)), scale=alt.Scale(scheme='category20b')),
                order=alt.Order('MONTH:O', sort='ascending'),
                tooltip=['City:N', 'MONTH:O', 'mean(Value):Q' if selected_feature in ['humidity', 'temp_mean', 'temp_min', 'temp_max'] else 'sum(Value):Q'],
            ).properties( width=530, height=350 ).transform_filter(brush)

            # City filter
            if selected_cities:
                points = points.encode( opacity=alt.condition(
                    alt.FieldOneOfPredicate(field='City', oneOf=selected_cities),
                    alt.value(1), alt.value(0.1)
                ))
                brush_chart = brush_chart.encode( opacity=alt.condition(
                    alt.FieldOneOfPredicate(field='City', oneOf=selected_cities),
                    alt.value(1), alt.value(0.1)
                ))
                data = data[data['City'].isin(selected_cities)]

            # Background Year
            background_year = alt.Chart(pd.DataFrame({'Year': [selected_year]})).mark_text(
                baseline='middle',
                fontSize=96,
                opacity=0.15,
                color='#222222'
            ).encode(text=alt.Text('Year:O'))

            # Scatter plot
            scatter = alt.Chart(data).mark_point(size=100).encode(
                x=alt.X('MONTH:O', title='month'),
                y=alt.Y('Value:Q', title='feature'),
                color=alt.Color('City:N', scale=alt.Scale(scheme='category20'), title='city'),
                tooltip=['City:N', 'MONTH:O', 'Feature:N', 'Value:Q'],
                opacity=alt.condition(hover, alt.value(1), alt.value(0.5))
            ).properties(width=480, height=350).add_params(hover)

            # Hover Connection
            hover_line = alt.layer(
                alt.Chart(data).mark_line().encode(
                    x='MONTH:O',
                    y='Value:Q',
                    color='City:N',
                    opacity=alt.condition(hover, alt.value(0.8), alt.value(0))
                ),
                alt.Chart(data).mark_point(size=50).encode(
                    opacity=alt.condition(hover, alt.value(0.8), alt.value(0))
                )
            )

            return background + points | brush_chart & background_year + scatter + hover_line
        except Exception as e:
            logger.error(f"Map generation failed: {str(e)}")
            return alt.Chart().mark_text(text="fail").properties(width=800, height=500)

In [11]:

# Define color themes
ACCENT_COLOR = "#007bff"
DARK_BG = "#f8f9fa"
TEXT_COLOR = "#333"
BORDER_COLOR = "#ccc"




def create_dashboard():
    """Create a dashboard"""
    try:
        # Initialize the data processor
        data_processor = WeatherDataProcessor("dataset/weather_prediction_dataset.csv")
        visualizer = WeatherVisualizer(data_processor)

        # UI
        year_selector = pn.widgets.IntSlider(
            name="Year",
            start=data_processor.df["YEAR"].min(),
            end=data_processor.df["YEAR"].max(),
            step=1,
            value=data_processor.df["YEAR"].min(),
        )
        year_selector.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "background-color": "#ffffff",
                "padding": "5px",
                "border-radius": "5px",
                "flex-grow": "1",  # Makes it expand within the container
                "width": "90%",  # Ensures full width
            }
        )
        # Month Selector (Displays names but sends numbers)
        month_selector = pn.widgets.CheckBoxGroup(
            name="Month",
            options=list(range(1, 13)),
            inline=False,
        )
        month_selector.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "background-color": "#ffffff",
                "padding": "5px",
                "border-radius": "5px",
                "flex-grow": "1",  # Makes it expand within the container
                "width": "90%",  # Ensures full width
            }
        )

        feature_selector = pn.widgets.Select(
            name="feature",
            options=WEATHER_FEATURES,
            value="temp_mean",
            width=250,
        )
        feature_selector.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "background-color": "#ffffff",
                "padding": "5px",
                "border-radius": "5px",
                "flex-grow": "1",  # Makes it expand within the container
                "width": "90%",  # Ensures full width
            }
        )

        city_selector = pn.widgets.CheckBoxGroup(
            name="locations",
            options=CITIES,
            inline=False,
        )
        city_selector.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "background-color": "#ffffff",
                "margin-top:": "10px",
                "padding": "5px",
                "border-radius": "5px",
                "flex-grow": "1",  # Makes it expand within the container
                "width": "90%",  # Ensures full width
            }
        )

        # Create a chart
        charts = pn.pane.Vega(
            visualizer.generate_charts(
                selected_year=year_selector.value,
                selected_months=month_selector.value,
                selected_feature=feature_selector.value,
                selected_cities=city_selector.value,
            ),
        )
        charts.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "padding": "5px",
                "border-radius": "5px",
            }
        )
        
        # Interactive update function
        def update_charts(event):
            charts.object = visualizer.generate_charts(
                selected_year=year_selector.value,
                selected_months=month_selector.value,
                selected_feature=feature_selector.value,
                selected_cities=city_selector.value,
            )

        # Adding event listeners
        year_selector.param.watch(update_charts, "value")
        month_selector.param.watch(update_charts, "value")
        feature_selector.param.watch(update_charts, "value")
        city_selector.param.watch(update_charts, "value")

        # Bottom Control Bar
        right_controller = pn.Column(
                year_selector,
                feature_selector,
                pn.pane.Markdown("month"),
                month_selector,
                pn.pane.Markdown("location"),
                city_selector,
        )
        right_controller.styles.update(
            {
                "border": f"2px solid {BORDER_COLOR}",
                "background-color": DARK_BG,
                "color": TEXT_COLOR,
                "padding": "5px",
                "margin": "5px",
                "border-radius": "8px",
                "display": "flex",
                "justify-content": "center",
                "align-items": "center",
            }
        )

        # Top Title
        header = pn.pane.Markdown("# 🌍 Interactive Weather Dashboard")
        header.styles.update(
            {
                "font-size": "10px",
                "font-weight": "bold",
                "text-align": "center",
                "color": ACCENT_COLOR,
                "padding": "10px",
                "position": "absolute",  # Floating above the chart
                "top": "10px",  # Position from top
                "z-index": "1000",  # Ensure it stays above other elements
            }
        )


        # Dashboard layout
        dashboard = pn.Row(
            header,
            pn.Column(charts, sizing_mode="stretch_both"),
            pn.Column(right_controller),
            sizing_mode="stretch_both",
            align="center",
        )

        return dashboard

    except Exception as e:
        logger.error(f"Dashboard creation failed: {str(e)}")
        return pn.Column(pn.pane.Markdown("# ❌ Dashboard creation failed"))

In [None]:
# Create and display a dashboard
dashboard = create_dashboard()
dashboard.show(threaded=True)

  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (216, 6)
INFO:bokeh.server.server:Starting Bokeh server version 3.6.0 (running on Tornado 6.4.1)


Map Data Preview:
    City  MONTH      Value    Feature  Longitude   Latitude
0  BASEL      1   2.045161  temp_mean   7.587708  47.558647
1  BASEL      2   5.662069  temp_mean   7.587708  47.558647
2  BASEL      3   7.609677  temp_mean   7.587708  47.558647
3  BASEL      4  11.263333  temp_mean   7.587708  47.558647
4  BASEL      5  16.238710  temp_mean   7.587708  47.558647


<StoppableThread(Thread-6 (get_server), started 8084)>

INFO:bokeh.server.tornado:User authentication hooks NOT provided (default user enabled)


Launching server at http://localhost:54642


INFO:tornado.access:200 GET / (::1) 206.73ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/reactiveesm/es-module-shims@%5E1.10.0/dist/es-module-shims.min.js (::1) 2.00ms
INFO:tornado.access:200 GET /static/js/bokeh-gl.min.js?v=674dcea77483f8996a069d63e07657e6f744a37d10997e93f172179a5ba5d1c9d7621815eeb5bd909aa463644476c1382f6254b9f387002e9ccfa6e640ddf0bb (::1) 2.43ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/vegaplot/vega-lite@5 (::1) 7.44ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/vegaplot/vega-embed@6 (::1) 10.45ms
INFO:tornado.access:200 GET /static/js/bokeh-tables.min.js?v=676e60ffbaef467a0f8e2321dd663a259b7d45834cb8f2b27597650b8bb80ec0288acae28a58fffa5d504d54d3a99ae27a03b20ad03eec4aa18bf8c8cfb4ec4b (::1) 0.00ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/vegaplot/vega@5 (::1) 19.64ms
INFO:tornado.access:200 GET /static/js/bokeh-widgets.min.js?v=fbdc924ac6c91fc9327f170e493da343090822b9951b2eaec18e0a2a4811fc596091

Map Data Preview:
         City  MONTH     Value    Feature  Longitude   Latitude
0       BASEL     11  7.700000  temp_mean   7.587708  47.558647
1    BUDAPEST     11  8.976667  temp_mean  19.033473  47.502438
2     DE_BILT     11  7.790000  temp_mean   5.182315  52.109085
3     DRESDEN     11  6.796667  temp_mean  13.757468  51.049963
4  DUSSELDORF     11  8.316667  temp_mean   6.779320  51.223290


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (36, 6)


Map Data Preview:
       City  MONTH     Value    Feature  Longitude   Latitude
0     BASEL      2  5.662069  temp_mean   7.587708  47.558647
1     BASEL     11  7.700000  temp_mean   7.587708  47.558647
2  BUDAPEST      2  4.562069  temp_mean  19.033473  47.502438
3  BUDAPEST     11  8.976667  temp_mean  19.033473  47.502438
4   DE_BILT      2  5.855172  temp_mean   5.182315  52.109085


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (54, 6)


Map Data Preview:
       City  MONTH      Value    Feature  Longitude   Latitude
0     BASEL      2   5.662069  temp_mean   7.587708  47.558647
1     BASEL     10  11.941935  temp_mean   7.587708  47.558647
2     BASEL     11   7.700000  temp_mean   7.587708  47.558647
3  BUDAPEST      2   4.562069  temp_mean  19.033473  47.502438
4  BUDAPEST     10  14.209677  temp_mean  19.033473  47.502438


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (72, 6)


Map Data Preview:
       City  MONTH      Value    Feature  Longitude   Latitude
0     BASEL      2   5.662069  temp_mean   7.587708  47.558647
1     BASEL      6  18.953333  temp_mean   7.587708  47.558647
2     BASEL     10  11.941935  temp_mean   7.587708  47.558647
3     BASEL     11   7.700000  temp_mean   7.587708  47.558647
4  BUDAPEST      2   4.562069  temp_mean  19.033473  47.502438


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (108, 6)


Map Data Preview:
    City  MONTH      Value    Feature  Longitude   Latitude
0  BASEL      2   5.662069  temp_mean   7.587708  47.558647
1  BASEL      6  18.953333  temp_mean   7.587708  47.558647
2  BASEL      7  17.445161  temp_mean   7.587708  47.558647
3  BASEL      8  20.235484  temp_mean   7.587708  47.558647
4  BASEL     10  11.941935  temp_mean   7.587708  47.558647


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (108, 6)


Map Data Preview:
    City  MONTH      Value    Feature  Longitude   Latitude
0  BASEL      2   5.662069  temp_mean   7.587708  47.558647
1  BASEL      6  18.953333  temp_mean   7.587708  47.558647
2  BASEL      7  17.445161  temp_mean   7.587708  47.558647
3  BASEL      8  20.235484  temp_mean   7.587708  47.558647
4  BASEL     10  11.941935  temp_mean   7.587708  47.558647


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (108, 6)


Map Data Preview:
    City  MONTH      Value    Feature  Longitude   Latitude
0  BASEL      2   5.662069  temp_mean   7.587708  47.558647
1  BASEL      6  18.953333  temp_mean   7.587708  47.558647
2  BASEL      7  17.445161  temp_mean   7.587708  47.558647
3  BASEL      8  20.235484  temp_mean   7.587708  47.558647
4  BASEL     10  11.941935  temp_mean   7.587708  47.558647


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (108, 6)


Map Data Preview:
    City  MONTH      Value    Feature  Longitude   Latitude
0  BASEL      2   3.096552  temp_mean   7.587708  47.558647
1  BASEL      6  17.603333  temp_mean   7.587708  47.558647
2  BASEL      7  19.396774  temp_mean   7.587708  47.558647
3  BASEL      8  19.783871  temp_mean   7.587708  47.558647
4  BASEL     10  12.190323  temp_mean   7.587708  47.558647


  filtered_df = filtered_df.fillna(method="ffill").fillna(method="bfill")
INFO:__main__:Map Data Shape: (98, 6)


Map Data Preview:
    City  MONTH     Value   Feature  Longitude   Latitude
0  BASEL    2.0  0.729655  humidity   7.587708  47.558647
1  BASEL    6.0  0.684000  humidity   7.587708  47.558647
2  BASEL    7.0  0.674516  humidity   7.587708  47.558647
3  BASEL    8.0  0.726129  humidity   7.587708  47.558647
4  BASEL   10.0  0.844194  humidity   7.587708  47.558647


ERROR:bokeh.server.protocol_handler:error handling message
 message: Message 'PATCH-DOC' content: {'events': [{'kind': 'MessageSent', 'msg_type': 'bokeh_event', 'msg_data': {'type': 'event', 'name': 'vega_event', 'values': {'type': 'map', 'entries': [['model', {'id': 'bf698711-4ded-4334-a562-f875461db0cc'}], ['data', {'type': 'map', 'entries': [['type', 'param_10'], ['value', [{'type': 'map', 'entries': [['Longitude', 18.077905872662523]]}, {'type': 'map', 'entries': [['Latitude', 59.330046148192885]]}]]]}]]}}}]} 
 error: ValueError("'param_10' is not a parameter of Selection")
Traceback (most recent call last):
  File "c:\Users\54062\anaconda3\Lib\site-packages\bokeh\server\protocol_handler.py", line 94, in handle
    work = await handler(message, connection)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\54062\anaconda3\Lib\site-packages\bokeh\server\session.py", line 94, in _needs_document_lock_wrapper
    result = func(self, *args, **kwargs)
             ^^^^^^^^^^^