In [18]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [3]:
df = pd.read_csv("weather.csv")
df.head()

Unnamed: 0,station_id,name,state,latitude,longitude,height,date,temperature_air_max_2m,temperature_air_min_2m,temperature_air_mean_2m,precipitation_form,precipitation_height,sunshine_duration,wind_speed,wind_gust_max,cloud_cover_total,snow_depth_new,snow_depth,humidity,pressure_air_site
0,427,Berlin Brandenburg,Brandenburg,52.3807,13.5306,46.0,2019-01-01,8.8,3.3,6.7,rain and snow,2.0,0.6,10.1,23.8,7.2,0.0,0.0,81.0,1010.7
1,427,Berlin Brandenburg,Brandenburg,52.3807,13.5306,46.0,2019-01-02,3.8,-1.8,2.1,only snow,0.4,1.8,8.1,16.3,4.4,0.0,0.0,62.0,1017.2
2,427,Berlin Brandenburg,Brandenburg,52.3807,13.5306,46.0,2019-01-03,2.2,-1.9,0.1,only snow,0.0,0.0,4.6,15.8,3.3,0.0,0.0,78.0,1026.1
3,427,Berlin Brandenburg,Brandenburg,52.3807,13.5306,46.0,2019-01-04,4.9,-1.3,1.8,only rain,1.2,0.0,5.1,11.3,7.7,0.0,0.0,94.0,1021.1
4,427,Berlin Brandenburg,Brandenburg,52.3807,13.5306,46.0,2019-01-05,8.1,2.5,6.1,only rain,0.0,0.6,5.5,11.1,7.0,0.0,0.0,89.0,1013.5


In [6]:
df.columns

Index(['station_id', 'name', 'state', 'latitude', 'longitude', 'height',
       'date', 'temperature_air_max_2m', 'temperature_air_min_2m',
       'temperature_air_mean_2m', 'precipitation_form', 'precipitation_height',
       'sunshine_duration', 'wind_speed', 'wind_gust_max', 'cloud_cover_total',
       'snow_depth_new', 'snow_depth', 'humidity', 'pressure_air_site'],
      dtype='object')

In [53]:
stations = df.groupby('name')['temperature_air_mean_2m'].mean().reset_index()
px.bar(stations, x='name', y='temperature_air_mean_2m', title="Average Temperature by Station")

In [5]:
px.line(df, x="date", y="temperature_air_mean_2m", title="Daily Temperature Over Time")

In [33]:
px.line(df, x='date', 
        y=['temperature_air_max_2m', 'temperature_air_min_2m'],
        title="Daily Max/Min Temperatures")


In [8]:
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['day_of_year'] = df['date'].dt.dayofyear
df['week'] = df['date'].dt.isocalendar().week

In [31]:
daily_sun = df.groupby('day_of_year')['sunshine_duration'].mean().reset_index()
daily_temp = df.groupby('day_of_year')['temperature_air_mean_2m'].mean().reset_index()
monthly_temp = df.groupby('month')['temperature_air_mean_2m'].mean().reset_index()

In [36]:
px.line(daily_sun, x="day_of_year", y="sunshine_duration", title="Daily Sunshine Duration")

In [37]:
px.bar(monthly_temp, x="month", y="temperature_air_mean_2m", title="Average Monthly Temperature")

In [None]:
fig = px.bar(
    monthly_temp,
    x="month",
    y="temperature_air_mean_2m",
    title="Daily sunshine and monthly temperature",
)

fig.add_traces(
    px.line(
        daily_sun,
        x="day_of_year",
        y="sunshine_duration",
    ).data
)

fig.show()


In [11]:
monthly_rain = df.groupby('month')['precipitation_height'].sum().reset_index()
px.bar(monthly_rain, x='month', y='precipitation_height',
       title="Total Monthly Precipitation")


In [40]:
temp_pivot = df.pivot_table(
    index='month', columns='day', values='temperature_air_mean_2m', aggfunc='mean'
)

px.imshow(temp_pivot,
          labels=dict(x="Day", y="Month", color="Mean Temp"),
          title="Temperature Heatmap by Month/Day")


In [41]:
corr = df[['temperature_air_mean_2m', 'humidity', 'pressure_air_site',
           'sunshine_duration', 'precipitation_height',
           'wind_speed', 'cloud_cover_total']].corr()

fig = px.imshow(corr, title='Correlation Heatmap of Weather Variables')
fig.show()


In [None]:
precip_events = df[df['precipitation_height'] > 0]
px.histogram(precip_events, x='date', nbins=50,
             title="Frequency of Precipitation Events")

In [54]:
df.columns

Index(['station_id', 'name', 'state', 'latitude', 'longitude', 'height',
       'date', 'temperature_air_max_2m', 'temperature_air_min_2m',
       'temperature_air_mean_2m', 'precipitation_form', 'precipitation_height',
       'sunshine_duration', 'wind_speed', 'wind_gust_max', 'cloud_cover_total',
       'snow_depth_new', 'snow_depth', 'humidity', 'pressure_air_site', 'year',
       'month', 'day', 'day_of_year', 'week', 'hour'],
      dtype='object')

In [57]:
snow_events = df[df['precipitation_form'] == 'only snow']
px.histogram(snow_events, x='date', nbins=50,
             title="Frequency of Snowfall")

In [58]:
snow_kiel = snow_events[snow_events['name'] == 'Kiel-Holtenau']
px.histogram(snow_kiel, x='date', nbins=20, title="Snowfall Events in Kiel")

In [64]:
px.histogram(snow_events, x='date', color='name', nbins=20, 
             title="Snowfall Events by Station")

In [65]:
# Extract month from date
snow_events['month'] = snow_events['date'].dt.to_period('M').astype(str)

# Count events by month and station
snow_by_month = snow_events.groupby(['month', 'name']).size().reset_index(name='count')

# Create bar plot
px.bar(snow_by_month, x='month', y='count', color='name', 
       barmode='group',
       title="Snowfall Events by Month and Station",
       labels={'count': 'Number of Events', 'month': 'Month', 'name': 'Station'})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [67]:
# Extract month name only
snow_events['month'] = snow_events['date'].dt.month_name()

# Count events by month and station
snow_by_month = snow_events.groupby(['month', 'name']).size().reset_index(name='count')

# Create bar plot with proper month order
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 
               'July', 'August', 'September', 'October', 'November', 'December']

px.bar(snow_by_month, x='month', y='count', color='name', 
       barmode='group',
       category_orders={'month': month_order},
       title="Snowfall Events by Month and Station (2019-2023)",
       labels={'count': 'Number of Events', 'month': 'Month', 'name': 'Station'})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

