In [41]:
# imports 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime as dt
from haversine import haversine
import plotly.express as px
from keplergl import KeplerGl
import pandas as pd
import numpy as np
import swifter
from tqdm.auto import tqdm
import geopandas as gpd
# see all columns
pd.set_option('display.max_columns', None)
# set seaborn style
sns.set_style('white')
# set context to notebook
sns.set_context('notebook')

In [42]:
weather = pd.read_csv('../data/processed/weather_compact.csv', parse_dates=['from', 'to'])

In [43]:
weather.head()

Unnamed: 0,longitude,latitude,from,to,municipalityName,acc_precip,bright_sunshine,max_precip_30m,max_temp_w_date,max_wind_speed_10min,mean_cloud_cover,mean_relative_hum,mean_temp,mean_wind_dir,mean_wind_speed,min_temp,snow_depth,temp_grass
0,12.352321,55.684977,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Albertslund,0.6,0.0,,13.8,2.4,93.0,98.7,13.6,15.0,2.0,13.4,,13.5
1,12.315178,55.851931,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Allerød,0.2,0.0,,13.6,1.6,99.0,99.3,13.4,8.0,1.0,13.3,,13.2
2,12.368402,55.727751,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Ballerup,0.3,0.0,,13.8,2.0,94.0,99.2,13.5,13.0,1.6,13.3,,13.4
3,12.404382,55.645037,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Brøndby,0.9,0.0,,14.0,3.3,93.0,98.8,13.7,14.0,2.2,13.5,,13.6
4,12.650228,55.593807,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Dragør,2.1,0.0,,14.1,4.1,100.0,96.9,13.8,11.0,3.7,13.7,,13.7


In [44]:
# drop bright_sunshine	max_precip_30m	max_temp_w_date	max_wind_speed_10min	mean_cloud_cover	mean_relative_hum mean_wind_dir	 snow_depth
weather.drop(columns=['bright_sunshine', 'max_precip_30m', 'max_temp_w_date', 'max_wind_speed_10min', 'mean_cloud_cover', 'mean_relative_hum', 'mean_wind_dir', 'snow_depth', 'temp_grass'], inplace=True)

In [45]:
weather.head()

Unnamed: 0,longitude,latitude,from,to,municipalityName,acc_precip,mean_temp,mean_wind_speed,min_temp
0,12.352321,55.684977,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Albertslund,0.6,13.6,2.0,13.4
1,12.315178,55.851931,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Allerød,0.2,13.4,1.0,13.3
2,12.368402,55.727751,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Ballerup,0.3,13.5,1.6,13.3
3,12.404382,55.645037,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Brøndby,0.9,13.7,2.2,13.5
4,12.650228,55.593807,2017-07-01 00:00:00+00:00,2017-07-01 01:00:00+00:00,Dragør,2.1,13.8,3.7,13.7


In [46]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274720 entries, 0 to 274719
Data columns (total 9 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   longitude         274720 non-null  float64
 1   latitude          274720 non-null  float64
 2   from              274720 non-null  object 
 3   to                274720 non-null  object 
 4   municipalityName  274720 non-null  object 
 5   acc_precip        263670 non-null  float64
 6   mean_temp         263670 non-null  float64
 7   mean_wind_speed   263670 non-null  float64
 8   min_temp          263670 non-null  float64
dtypes: float64(6), object(3)
memory usage: 18.9+ MB


In [47]:
weather['from'] = pd.to_datetime(weather['from'], utc=True,)
weather['to'] = pd.to_datetime(weather['to'], utc=True,)
weather['date'] = weather['from'].dt.date

In [48]:
weather.isna().sum() 

longitude               0
latitude                0
from                    0
to                      0
municipalityName        0
acc_precip          11050
mean_temp           11050
mean_wind_speed     11050
min_temp            11050
date                    0
dtype: int64

In [55]:
# drop rows with missing values
weather.dropna(inplace=True)

In [61]:
daily_weather = weather.groupby(['municipalityName', 'date']).agg({
    'latitude': 'first',  # Taking the first longitude of the day
    'longitude': 'first',  # Taking the first latitude of the day
    'acc_precip': 'sum',  # Sum for daily accumulated precipitation
    'mean_temp': 'mean',  # Average for daily mean temperature
    'mean_wind_speed': 'mean',  # Average for daily mean wind speed
    'min_temp': 'min'  # Minimum for daily minimum temperature
}).reset_index()

In [62]:
daily_weather.head()

Unnamed: 0,municipalityName,date,latitude,longitude,acc_precip,mean_temp,mean_wind_speed,min_temp
0,Albertslund,2017-07-01,55.684977,12.352321,2.5,14.841667,2.508333,11.9
1,Albertslund,2017-07-02,55.684977,12.352321,0.0,15.0875,5.858333,12.0
2,Albertslund,2017-07-03,55.684977,12.352321,6.0,14.483333,5.483333,10.2
3,Albertslund,2017-07-04,55.684977,12.352321,0.9,14.775,4.479167,9.8
4,Albertslund,2017-07-05,55.684977,12.352321,0.0,15.066667,2.8375,9.5


In [63]:
len(daily_weather)

10676

In [64]:
# see daily weather date range
daily_weather['date'].min(), daily_weather['date'].max()

(datetime.date(2017, 7, 1), datetime.date(2018, 5, 10))

In [65]:
# save daily weather data
daily_weather.to_csv('../data/processed/daily_weather.csv', index=False)