In [10]:
import altair as alt
import pandas as pd
import numpy as np
import datetime as dt


alt.data_transformers.disable_max_rows()

df = pd.read_csv('EU-UK-Weather-Data.csv')
df['date'] = pd.to_datetime(df['date'])
print(df.columns)
df.head()

Index(['country', 'location_name', 'latitude', 'longitude',
       'temperature_celsius', 'humidity', 'air_quality_Ozone', 'date', 'day',
       'month', 'year', 'week', 'time', 'season', 'condition'],
      dtype='object')


Unnamed: 0,country,location_name,latitude,longitude,temperature_celsius,humidity,air_quality_Ozone,date,day,month,year,week,time,season,condition
0,Netherlands,Amsterdam,52.37,4.89,20.0,68,29.0,2024-05-16,16,5,2024,1,16:00:00,Spring,Clear/Sunny
1,Netherlands,Amsterdam,52.37,4.89,16.0,88,77.3,2024-05-17,17,5,2024,1,18:00:00,Spring,Clear/Sunny
2,Netherlands,Amsterdam,52.37,4.89,22.0,50,57.9,2024-05-18,18,5,2024,1,16:30:00,Spring,Rainy
3,Netherlands,Amsterdam,52.37,4.89,22.0,57,110.2,2024-05-19,19,5,2024,1,16:00:00,Spring,Clear/Sunny
4,Netherlands,Amsterdam,52.37,4.89,19.0,78,103.0,2024-05-20,20,5,2024,2,16:45:00,Spring,Rainy


In [11]:
# add some jitter so that points don't overlap at a particular location
df['latitude_jitter'] = df['latitude'] + np.random.uniform(-0.01, 0.01, size=len(df))
df['longitude_jitter'] = df['longitude'] + np.random.uniform(-0.01, 0.01, size=len(df))

In [16]:
WIDTH, HEIGHT = 800, 500


date_range = (dt.date(2024, 5, 20), dt.date(2024, 5, 27))
date_selection = alt.selection_interval(
    encodings=['x'],
    value={'x': date_range}
)
dropdown = alt.binding_select(
    options=['temperature_celsius', 'humidity', 'air_quality_Ozone'],
    labels=['Temperature °C', 'Humidity %', 'Air Quality Ozone'],
    name='Metric '
)
size_param = alt.param(
    value='temperature_celsius',
    bind=dropdown
)
location_point = alt.selection_point(fields=['location_name'])
location_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same locations'))
location_matches = alt.expr.test(location_point.location_name, alt.datum.location_name)
day_point = alt.selection_point(fields=['day'])
day_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same days'))
day_matches = alt.expr.test(day_point.day, alt.datum.day)
week_point = alt.selection_point(fields=['week'])
week_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same week #'))
week_matches = alt.expr.test(week_point.week, alt.datum.week)
month_point = alt.selection_point(fields=['month'])
month_checkbox = alt.param(value=False, bind=alt.binding_checkbox(name='Select same month'))
month_matches = alt.expr.test(month_point.month, alt.datum.month)


opacity = (
    alt.when((~location_checkbox & ~day_checkbox & ~week_checkbox & ~month_checkbox) | 
             (location_checkbox & location_matches) | 
             (day_checkbox & day_matches) |
             (week_checkbox & week_matches) |
             (month_checkbox & month_matches))
    .then(alt.value(1.0))
    .otherwise(alt.value(0.1))
)

scatter = alt.Chart(df).mark_point().encode(
    x=alt.X('longitude_jitter:Q', title='Longitude'),
    y=alt.Y('latitude_jitter:Q', title='Latitude'),
    shape=alt.Shape('condition:N', title='Shape', legend=alt.Legend(orient='bottom')),
    color=alt.Color('location_name:N', title='Cities', legend=alt.Legend(orient='right')),
    size=alt.Size('size:Q', title='Metric', legend=alt.Legend(orient='bottom')),
    opacity=opacity,
    tooltip=['location_name', 'condition', 'date', 'week', 'temperature_celsius', 'humidity', 'air_quality_Ozone',]
).transform_filter(
    date_selection
).add_params(
    size_param,
    location_point, location_checkbox,
    day_point, day_checkbox,
    week_point, week_checkbox,
    month_point, month_checkbox,
).transform_calculate(
    size=f'datum[{size_param.name}]',
    size_title=size_param.name,
).properties(
    width=WIDTH, height=0.7 * HEIGHT
).interactive()

timeline = alt.Chart(df).mark_bar().encode(
    x=alt.X('date:T', title='Date'),
    y=alt.Y('count():Q', title='Number of Records'),
    color=alt.Color('condition:N', title='Weather Condition', legend=alt.Legend(orient='top')),
    tooltip=['date:T', 'count():Q']
).add_params(
    date_selection
).properties(
    width=WIDTH,
    height=0.3 * HEIGHT
)

system2 = alt.vconcat(timeline, scatter).resolve_scale(
    color='independent',
)
system2

### When did it first start snowing?

In [94]:
df[df['condition'] == 'Snowy']['date'].min()

Timestamp('2024-09-12 00:00:00')