In [1]:
import altair as alt
import pandas as pd
import numpy as np
import re
import datetime as dt


alt.data_transformers.disable_max_rows()

df = pd.read_csv('EU-UK-Weather-Data.csv')
df['date'] = pd.to_datetime(df['date'])
df['id'] = range(1, len(df) + 1)

# map temporal columns to a unique code
code_map = lambda val: chr(val - 1 + 97)
df['_day'] = df['day'].apply(code_map)
df['_week'] = df['week'].apply(code_map)
df['_month'] = df['month'].apply(code_map)

# add some jitter so that points don't overlap at a particular location
df['latitude_jitter'] = df['latitude'] + np.random.uniform(-0.5, 0.5, size=len(df))
df['longitude_jitter'] = df['longitude'] + np.random.uniform(-0.5, 0.5, size=len(df))

print(df.columns)
df.head()

Index(['country', 'location_name', 'latitude', 'longitude',
       'last_updated_epoch', 'temperature_celsius', 'humidity', 'date', 'day',
       'month', 'year', 'week', 'time', 'season', 'condition', 'id', '_day',
       '_week', '_month', 'latitude_jitter', 'longitude_jitter'],
      dtype='object')


Unnamed: 0,country,location_name,latitude,longitude,last_updated_epoch,temperature_celsius,humidity,date,day,month,...,week,time,season,condition,id,_day,_week,_month,latitude_jitter,longitude_jitter
0,Netherlands,Amsterdam,52.37,4.89,1715868000,20.0,68,2024-05-16,16,5,...,1,14:00:00,Spring,Clear/Sunny,1,p,a,e,51.998259,5.054819
1,Netherlands,Amsterdam,52.37,4.89,1715961600,16.0,88,2024-05-17,17,5,...,1,16:00:00,Spring,Clear/Sunny,2,q,a,e,51.956674,4.983213
2,Netherlands,Amsterdam,52.37,4.89,1716042600,22.0,50,2024-05-18,18,5,...,1,14:30:00,Spring,Rainy,3,r,a,e,52.126277,4.627543
3,Netherlands,Amsterdam,52.37,4.89,1716127200,22.0,57,2024-05-19,19,5,...,1,14:00:00,Spring,Clear/Sunny,4,s,a,e,52.628127,4.583572
4,Netherlands,Amsterdam,52.37,4.89,1716216300,19.0,78,2024-05-20,20,5,...,2,14:45:00,Spring,Rainy,5,t,b,e,52.660464,4.866866


In [2]:
WIDTH, HEIGHT = 800, 500

# Date selection
date_range = (dt.date(2024, 5, 20), dt.date(2024, 5, 27))
date_selection = alt.selection_interval(encodings=['x'], value={'x': date_range})

# Metric dropdown
dropdown = alt.binding_select(
    options=['temperature_celsius', 'humidity'],
    labels=['Temperature °C', 'Humidity %'],
    name='Metric '
)
size_param = alt.param(value='temperature_celsius', bind=dropdown)

# Scaling exponent slider
exp_slider = alt.binding_range(min=1, max=10, step=1, name='Metric Scaling Exponent:')
exp_selection = alt.param(value=5, bind=exp_slider)

# Condition selection
condition_selection = alt.selection_point(fields=['condition'], bind='legend')

# ID point selection
id_point = alt.selection_point(fields=['id'])
id_match = alt.expr.test(id_point.id, alt.datum.id)

# Day, week, month, season selections
day_point = alt.selection_point(fields=['_day', '_week', '_month'])
day_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same day'))
day_matches = alt.expr.test(day_point._day, alt.datum._day) & alt.expr.test(day_point._week, alt.datum._week) & alt.expr.test(day_point._month, alt.datum._month)

week_point = alt.selection_point(fields=['_week'])
week_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same week #'))
week_matches = alt.expr.test(week_point._week, alt.datum._week)

month_point = alt.selection_point(fields=['_month'])
month_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same month'))
month_matches = alt.expr.test(month_point._month, alt.datum._month)

season_point = alt.selection_point(fields=['season'])
season_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same season'))
season_matches = alt.expr.test(season_point.season, alt.datum.season)

not_checkbox = (~day_checkbox & ~week_checkbox & ~month_checkbox & ~season_checkbox)
selection_boolean = not_checkbox | \
                    (day_checkbox & day_matches) | \
                    (week_checkbox & week_matches) | \
                    (month_checkbox & month_matches) | \
                    (season_checkbox & season_matches)

scale_bind = alt.selection_interval(bind='scales')

# Temperature range sliders (min and max)
temp_min_slider = alt.binding_range(min=df['temperature_celsius'].min(), max=df['temperature_celsius'].max(), step=0.1, name='Temperature (°C) Min:')
temp_max_slider = alt.binding_range(min=df['temperature_celsius'].min(), max=df['temperature_celsius'].max(), step=0.1, name='Temperature (°C) Max:')
temp_min = alt.param(value=df['temperature_celsius'].min(), bind=temp_min_slider)
temp_max = alt.param(value=df['temperature_celsius'].max(), bind=temp_max_slider)

# Humidity range sliders (min and max)
humidity_min_slider = alt.binding_range(min=df['humidity'].min(), max=df['humidity'].max(), step=0.1, name='Humidity (%) Min:')
humidity_max_slider = alt.binding_range(min=df['humidity'].min(), max=df['humidity'].max(), step=0.1, name='Humidity (%) Max:')
humidity_min = alt.param(value=df['humidity'].min(), bind=humidity_min_slider)
humidity_max = alt.param(value=df['humidity'].max(), bind=humidity_max_slider)

search_box = alt.param(
    value='',
    bind=alt.binding(input='search', placeholder='City', name='Search ')
)
search_matches = alt.expr.test(alt.expr.regexp(search_box, 'i'), alt.datum.location_name)

# Minimap
minimap = alt.Chart(df).mark_point().encode(
    x=alt.X('longitude_jitter:Q', title=None, axis=None),
    y=alt.Y('latitude_jitter:Q', title=None, axis=None),
    color=alt.when(scale_bind).then(alt.Color('location_name:N', legend=None)).otherwise(alt.value('lightgray')),
).transform_filter(
    date_selection
).transform_filter(
    selection_boolean
).transform_filter(
    search_matches,
).properties(
    width=0.1 * WIDTH,
    height=0.1 * HEIGHT,
)

# Scatter plot
scatter = alt.Chart(df).mark_point().encode(
    x=alt.X('longitude_jitter:Q', title='Longitude (Approx)'),
    y=alt.Y('latitude_jitter:Q', title='Latitude (Approx)'),
    shape=alt.Shape('condition:N', title='Condition', legend=alt.Legend(orient='left', columns=2)),
    color=alt.Color('location_name:N', title='City', legend=alt.Legend(orient='left', columns=2)),
    size=alt.Size('size:Q', title='Metric', scale=alt.Scale(type='pow', exponent=exp_selection), legend=alt.Legend(orient='left', columns=2)),
    opacity=alt.condition((selection_boolean & id_match), alt.value(1.0), alt.value(0.2)),
    tooltip=[alt.Tooltip('location_name', title='City'),
             alt.Tooltip('condition', title='Condition'),
             alt.Tooltip('date', title='Date'),
             alt.Tooltip('week', title='Week #'),
             alt.Tooltip('season', title='Season'),
             alt.Tooltip('temperature_celsius', title='Temperature (°C)'),
             alt.Tooltip('humidity', title='Humidity (%)')],
).transform_filter(
    date_selection
).transform_filter(
    scale_bind
).transform_filter(
    condition_selection
).transform_filter(
    alt.datum.temperature_celsius >= temp_min
).transform_filter(
    alt.datum.temperature_celsius <= temp_max
).transform_filter(
    alt.datum.humidity >= humidity_min
).transform_filter(
    alt.datum.humidity <= humidity_max
).transform_filter(
    selection_boolean
).transform_filter(
    search_matches,
).add_params(
    search_box,
    scale_bind,
    size_param,
    exp_selection,
    id_point,
    day_point, day_checkbox,
    week_point, week_checkbox,
    month_point, month_checkbox,
    season_point, season_checkbox,
    temp_min,
    temp_max,
    humidity_min,
    humidity_max,
).transform_calculate(
    size=f'datum[{size_param.name}]',
).properties(
    width=0.9 * WIDTH,
    height=0.7 * HEIGHT,
)

# Timeline
timeline = alt.Chart(df).mark_bar().encode(
    x=alt.X('date:T', title='Date'),
    y=alt.Y('count():Q', title='Number of Cities'),
    color=alt.Color('condition:N', title='Condition', legend=alt.Legend(orient='left', columns=2)),
    opacity=alt.condition(condition_selection, alt.value(1), alt.value(0.2)),
    tooltip=[alt.Tooltip('date:T', title='Date'),
             alt.Tooltip('week', title='Week #'),
             alt.Tooltip('season', title='Season'),
             alt.Tooltip('condition', title='Condition'),
             alt.Tooltip('count():Q', title='No. of Cities')]
).add_params(
    date_selection,
    condition_selection
).properties(
    title='Timeline',
    width=WIDTH + 30,
    height=0.3 * HEIGHT,
).transform_filter(
    selection_boolean
).transform_filter(
    search_matches,
)

# Combine charts
systemB = alt.vconcat(timeline, (scatter | minimap)).resolve_scale(
    color='independent',
    shape='independent',
    size='independent',
).properties(
    # title='System B'
)
systemB.save('SystemB.html')
systemB

In [3]:
with open('SystemB.html', 'r', encoding='utf-8') as f:
    html_content = f.read()

modified_html = re.sub(
    r'<body>',
    '''
    <body>
    <h2>System B</h2>
    <h3>Usage</h3>
    <ul>
    <li>The timeline chart offers a range selection of date, with a default selection of week 2.</li>
    <li>The condition legend can be used as a filter.</li>
    <li>The scatter plot supports zooming and panning, with a minimap indicating your relative position.</li>
    <li>The city search box enables filtering by location.</li>
    <li>The metric dropdown determines the quantitative attribute encoded by size.</li>
    <li>The metric scaling exponent can be used to magnify larger values.</li>
    <li>Selecting a point, allows query relaxation by time - selecting the checkbox according to the traversal policy.</li>
    <li>The temperature and humidity sliders allow filtering of data.</li>
    <li>Refresh the page to reset the system.</li>
    </ul>
    <h3>Tasks</h3>
    <ol>
    <li>Determine the week # when the first occurrence of a “Snowy” weather condition took place.</li>
    <li>Find three cities that had humidity levels between 40% and 50% in week 28.</li>
    <li>Find the date when London recorded its highest temperature, and determine the lowest temperature recorded across Europe on the same day.</li>
    <li>Rank the cities - Luxembourg, London, and Brussels based on the number of “Fog / Mist” days experienced in November 2024.</li>
    </ol>
    ''',
    html_content
)

with open('SystemB.html', 'w', encoding='utf-8') as f:
    f.write(modified_html)