### Loading required packages

In [36]:
import pandas as pd
import boto3
from datetime import datetime
import plotly.graph_objects as go

### Loading and combining data
#### Meteo data

In [37]:
# loading meteo data
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
years = ['2022']
base_url_meteo = 'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Meteo+data/LC_{}{}.csv'

dfs = []

for year in years:
    for quarter in quarters:
        url = base_url_meteo.format(year, quarter)
        df = pd.read_csv(url)
        dfs.append(df)

# Concatenate all the dataframes into a single dataframe
meteo_combined_df = pd.concat(dfs, ignore_index=True)
meteo_combined_df.head()


MemoryError: Unable to allocate 140. MiB for an array with shape (13, 1415232) and data type float64

In [38]:
# Drop the columns we won't use
columns_to_keep = ['LC_RAININ', 'LC_DAILYRAIN', 'LC_WINDDIR', 'LC_WINDSPEED', 'LC_TEMP_QCL3', 'Month', 'Day', 'Hour']  #there's less columns we keep than drop
columns_to_drop = set(meteo_combined_df.columns) - set(columns_to_keep)
meteo_combined_df.drop(columns=columns_to_drop, inplace=True)
meteo_combined_df.head(1050)

Unnamed: 0,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,Month,Day,Hour,LC_TEMP_QCL3
0,0.0,0.0,-169.0,0.43,1,1,0,13.048027
1,0.0,0.0,-170.0,0.33,1,1,0,12.985849
2,0.0,0.0,-167.0,0.46,1,1,0,12.950322
3,0.0,0.0,-160.0,0.52,1,1,0,12.949550
4,0.0,0.0,-166.0,0.51,1,1,0,12.952268
...,...,...,...,...,...,...,...,...
1045,0.0,0.0,-150.0,0.06,1,8,6,1.024274
1046,0.0,0.0,-162.0,0.45,1,8,6,0.734263
1047,0.0,0.0,-165.0,0.39,1,8,6,0.756097
1048,0.0,0.0,179.0,0.37,1,8,6,0.840851


In [None]:
# check for percentage of missing values in each column
print(meteo_combined_df.isnull().sum() / len(meteo_combined_df))

In [None]:
# imputation is not a good idea in this case, because weather variables will closely reflect the 2 observations around it rather than the mean of the entire column/day/hour

In [None]:
# aggregate meteo data by day
meteo_combined_df = meteo_combined_df.groupby(['Year','Month', 'Day']).mean()
meteo_combined_df = meteo_combined_df.reset_index()
meteo_combined_df.head()

del dfs # deleting the individual meteo datasets

In [None]:
# check for missing values in each column
print(meteo_combined_df.isnull().sum())

In [None]:
# export dataframe (only needs to be ran once so comment it out)
# meteo_combined_df.to_csv('combined_weatherdata_2022.csv', index=False)

#### Noise data

- January 

In [None]:
# Define a list of URLs
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

In [None]:
# Combining the datasets for January
combined_jan = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

In [None]:
# extract the date, month, hour, minute of "result_timestamp"
combined_jan['result_timestamp'] = pd.to_datetime(combined_jan['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_jan['result_date'] = combined_jan['result_timestamp'].dt.date
combined_jan['result_month'] = combined_jan['result_timestamp'].dt.month
combined_jan['result_day'] = combined_jan['result_timestamp'].dt.day
combined_jan['result_hour'] = combined_jan['result_timestamp'].dt.hour
combined_jan['result_minute'] = combined_jan['result_timestamp'].dt.minute

combined_jan.head()

In [None]:
# aggregate the data by day
combined_jan = combined_jan.groupby(['result_date','description']).mean()
combined_jan = combined_jan.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_jan.drop(columns_to_drop, axis=1, inplace=True)
combined_jan.head(150)

In [None]:
# check for missing values in each column
print(combined_jan.isnull().sum())

- February

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

In [None]:
# Combining the datasets for February
combined_feb = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

In [None]:
# extract the date, month, hour, minute of "result_timestamp"
combined_feb['result_timestamp'] = pd.to_datetime(combined_feb['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_feb['result_date'] = combined_feb['result_timestamp'].dt.date
combined_feb['result_month'] = combined_feb['result_timestamp'].dt.month
combined_feb['result_day'] = combined_feb['result_timestamp'].dt.day
combined_feb['result_hour'] = combined_feb['result_timestamp'].dt.hour
combined_feb['result_minute'] = combined_feb['result_timestamp'].dt.minute

combined_feb.head(1000)

In [None]:
# aggregate the data by day
combined_feb = combined_feb.groupby(['result_date','description']).mean()
combined_feb = combined_feb.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_feb.drop(columns_to_drop, axis=1, inplace=True)
combined_feb.head(150)

In [None]:
# check for missing values in each column
print(combined_feb.isnull().sum())

- March

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for March
combined_mar = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_mar['result_timestamp'] = pd.to_datetime(combined_mar['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_mar['result_date'] = combined_mar['result_timestamp'].dt.date
combined_mar['result_month'] = combined_mar['result_timestamp'].dt.month
combined_mar['result_day'] = combined_mar['result_timestamp'].dt.day
combined_mar['result_hour'] = combined_mar['result_timestamp'].dt.hour
combined_mar['result_minute'] = combined_mar['result_timestamp'].dt.minute

combined_mar.head()

# aggregate the data by day
combined_mar = combined_mar.groupby(['result_date','description']).mean()
combined_mar = combined_mar.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_mar.drop(columns_to_drop, axis=1, inplace=True)
combined_mar.head(150)

# check for missing values in each column
print(combined_mar.isnull().sum())

- April

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for April
combined_apr = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_apr['result_timestamp'] = pd.to_datetime(combined_apr['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_apr['result_date'] = combined_apr['result_timestamp'].dt.date
combined_apr['result_month'] = combined_apr['result_timestamp'].dt.month
combined_apr['result_day'] = combined_apr['result_timestamp'].dt.day
combined_apr['result_hour'] = combined_apr['result_timestamp'].dt.hour
combined_apr['result_minute'] = combined_apr['result_timestamp'].dt.minute

combined_apr.head()

# aggregate the data by day
combined_apr = combined_apr.groupby(['result_date','description']).mean()
combined_apr = combined_apr.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_apr.drop(columns_to_drop, axis=1, inplace=True)
combined_apr.head(150)

# check for missing values in each column
print(combined_apr.isnull().sum())

- May

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for May
combined_may = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_may['result_timestamp'] = pd.to_datetime(combined_may['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_may['result_date'] = combined_may['result_timestamp'].dt.date
combined_may['result_month'] = combined_may['result_timestamp'].dt.month
combined_may['result_day'] = combined_may['result_timestamp'].dt.day
combined_may['result_hour'] = combined_may['result_timestamp'].dt.hour
combined_may['result_minute'] = combined_may['result_timestamp'].dt.minute

combined_may.head()

# aggregate the data by day
combined_may = combined_may.groupby(['result_date','description']).mean()
combined_may = combined_may.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_may.drop(columns_to_drop, axis=1, inplace=True)
combined_may.head(150)

# check for missing values in each column
print(combined_may.isnull().sum())

- June

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for June
combined_jun = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_jun['result_timestamp'] = pd.to_datetime(combined_jun['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_jun['result_date'] = combined_jun['result_timestamp'].dt.date
combined_jun['result_month'] = combined_jun['result_timestamp'].dt.month
combined_jun['result_day'] = combined_jun['result_timestamp'].dt.day
combined_jun['result_hour'] = combined_jun['result_timestamp'].dt.hour
combined_jun['result_minute'] = combined_jun['result_timestamp'].dt.minute

combined_jun.head()

# aggregate the data by day
combined_jun = combined_jun.groupby(['result_date','description']).mean()
combined_jun = combined_jun.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_jun.drop(columns_to_drop, axis=1, inplace=True)
combined_jun.head(150)

# check for missing values in each column
print(combined_jun.isnull().sum())

- July

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for July
combined_jul = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_jul['result_timestamp'] = pd.to_datetime(combined_jul['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_jul['result_date'] = combined_jul['result_timestamp'].dt.date
combined_jul['result_month'] = combined_jul['result_timestamp'].dt.month
combined_jul['result_day'] = combined_jul['result_timestamp'].dt.day
combined_jul['result_hour'] = combined_jul['result_timestamp'].dt.hour
combined_jul['result_minute'] = combined_jul['result_timestamp'].dt.minute

combined_jul.head()

# aggregate the data by day
combined_jul = combined_jul.groupby(['result_date','description']).mean()
combined_jul = combined_jul.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_jul.drop(columns_to_drop, axis=1, inplace=True)
combined_jul.head(150)

# check for missing values in each column
print(combined_jul.isnull().sum())

- August

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for August
combined_aug = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_aug['result_timestamp'] = pd.to_datetime(combined_aug['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_aug['result_date'] = combined_aug['result_timestamp'].dt.date
combined_aug['result_month'] = combined_aug['result_timestamp'].dt.month
combined_aug['result_day'] = combined_aug['result_timestamp'].dt.day
combined_aug['result_hour'] = combined_aug['result_timestamp'].dt.hour
combined_aug['result_minute'] = combined_aug['result_timestamp'].dt.minute

combined_aug.head()

# aggregate the data by day
combined_aug = combined_aug.groupby(['result_date','description']).mean()
combined_aug = combined_aug.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_aug.drop(columns_to_drop, axis=1, inplace=True)
combined_aug.head(150)

# check for missing values in each column
print(combined_aug.isnull().sum())

- September

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for September
combined_sep = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_sep['result_timestamp'] = pd.to_datetime(combined_sep['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_sep['result_date'] = combined_sep['result_timestamp'].dt.date
combined_sep['result_month'] = combined_sep['result_timestamp'].dt.month
combined_sep['result_day'] = combined_sep['result_timestamp'].dt.day
combined_sep['result_hour'] = combined_sep['result_timestamp'].dt.hour
combined_sep['result_minute'] = combined_sep['result_timestamp'].dt.minute

combined_sep.head()

# aggregate the data by day
combined_sep = combined_sep.groupby(['result_date','description']).mean()
combined_sep = combined_sep.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_sep.drop(columns_to_drop, axis=1, inplace=True)
combined_sep.head(150)

# check for missing values in each column
print(combined_sep.isnull().sum())

- October

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for October
combined_oct = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_oct['result_timestamp'] = pd.to_datetime(combined_oct['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_oct['result_date'] = combined_oct['result_timestamp'].dt.date
combined_oct['result_month'] = combined_oct['result_timestamp'].dt.month
combined_oct['result_day'] = combined_oct['result_timestamp'].dt.day
combined_oct['result_hour'] = combined_oct['result_timestamp'].dt.hour
combined_oct['result_minute'] = combined_oct['result_timestamp'].dt.minute

combined_oct.head()

# aggregate the data by day
combined_oct = combined_oct.groupby(['result_date','description']).mean()
combined_oct = combined_oct.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_oct.drop(columns_to_drop, axis=1, inplace=True)
combined_oct.head(150)

# check for missing values in each column
print(combined_oct.isnull().sum())

- November

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for November
combined_nov = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_nov['result_timestamp'] = pd.to_datetime(combined_nov['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_nov['result_date'] = combined_nov['result_timestamp'].dt.date
combined_nov['result_month'] = combined_nov['result_timestamp'].dt.month
combined_nov['result_day'] = combined_nov['result_timestamp'].dt.day
combined_nov['result_hour'] = combined_nov['result_timestamp'].dt.hour
combined_nov['result_minute'] = combined_nov['result_timestamp'].dt.minute

combined_nov.head()

# aggregate the data by day
combined_nov = combined_nov.groupby(['result_date','description']).mean()
combined_nov = combined_nov.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_nov.drop(columns_to_drop, axis=1, inplace=True)
combined_nov.head(150)

# check for missing values in each column
print(combined_nov.isnull().sum())

- December

In [None]:
# Define a list of URLs 
urls = [
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255439_mp-01-naamsestraat-35-maxim.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255440_mp-02-naamsestraat-57-xior.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255441_mp-03-naamsestraat-62-taste.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255442_mp-05-calvariekapel-ku-leuven.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255443_mp-06-parkstraat-2-la-filosovia.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255444_mp-07-naamsestraat-81.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255445_mp-08-kiosk-stadspark.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_280324_mp08bis---vrijthof.csv',
    'https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_303910_mp-04-his-hears.csv'
   ]

# Create an empty list to store the DataFrames
dfs = []

# Loop through each URL and read the CSV into a DataFrame
for url in urls:
    df = pd.read_csv(url, header=0, sep=';')
    dfs.append(df)

# Now we have a list of DataFrames for each URL

# Combining the datasets for December
combined_dec = pd.concat(dfs, ignore_index=True)
del dfs # deleting the separate dataframes to minimize memory usage

# extract the date, month, hour, minute of "result_timestamp"
combined_dec['result_timestamp'] = pd.to_datetime(combined_dec['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')

combined_dec['result_date'] = combined_dec['result_timestamp'].dt.date
combined_dec['result_month'] = combined_dec['result_timestamp'].dt.month
combined_dec['result_day'] = combined_dec['result_timestamp'].dt.day
combined_dec['result_hour'] = combined_dec['result_timestamp'].dt.hour
combined_dec['result_minute'] = combined_dec['result_timestamp'].dt.minute

combined_dec.head()

# aggregate the data by day
combined_dec = combined_dec.groupby(['result_date','description']).mean()
combined_dec = combined_dec.reset_index()
columns_to_drop = ['result_hour', 'result_minute']
combined_dec.drop(columns_to_drop, axis=1, inplace=True)
combined_dec.head(150)

# check for missing values in each column
print(combined_dec.isnull().sum())

#### Yearly noise data

In [None]:
# List of the monthly datasets
datasets = [combined_jan, combined_feb, combined_mar, combined_apr, combined_may, combined_jun, combined_jul, combined_aug, combined_sep, combined_oct, combined_nov, combined_dec]

# Concatenate the datasets vertically
combined_year = pd.concat(datasets, ignore_index=True)
del datasets

# Reset the index of the combined dataset
combined_year = combined_year.reset_index()

# Display the combined and sorted yearly dataset

combined_year.head(2000)


In [None]:
# exporting file (only needs to be ran one time so comment it out)
# combined_year.to_csv('combined_noisedata_2022.csv', index=False)  


Now that we have exported the preprocessed dataframes for the noise and weather data of 2022, we can just use these files instead of loading all 112 files from the S3 bucket each time, as this takes a lot of time.

In [None]:
data_noise = pd.read_csv('combined_noisedata_2022.csv', header=0, sep=',')
data_noise.head()

In [None]:
data_weather = pd.read_csv('combined_weatherdata_2022.csv', header=0, sep=',')
data_weather.head()

## OLD PREPROCESSING

### Reading in the data from the S3 bucket (don't forget to pip install boto3)

In [None]:
# # meteo data
# Q1_2022 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Meteo+data/LC_2022Q1.csv')
# Q2_2022 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Meteo+data/LC_2022Q2.csv')
# Q3_2022 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Meteo+data/LC_2022Q3.csv')
# Q4_2022 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Meteo+data/LC_2022Q4.csv')

In [None]:
# REMARK: this is the 'old' noise data, don't run this

# noise data
# exp40_naamse35 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# exp40_naamse57 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# exp40_naamse62 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# exp40_calvarie = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# exp40_naamse81 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# exp40_park = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# exp40_kiosk = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# exp40_vrijt = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# exp40_his = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_40/csv_results_40_303910_mp-04-his-hears.csv', header=0, sep=';')

# exp41_naamse35 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# exp41_naamse57 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# exp41_naamse62 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# exp41_calvarie = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# exp41_naamse81 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# exp41_park = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# exp41_kiosk = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# exp41_vrijt = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# exp41_his = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_41/csv_results_41_303910_mp-04-his-hears.csv', header=0, sep=';')

# exp42_naamse35 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# exp42_naamse57 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# exp42_naamse62 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# exp42_calvarie = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# exp42_naamse81 = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# exp42_park = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# exp42_kiosk = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# exp42_vrijt = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# exp42_his = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/export_42/csv_results_42_303910_mp-04-his-hears.csv', header=0, sep=';')


In [None]:
# noise_columns = ["#object_id", "description", "result_timestamp", "lamax", "laeq"]
# naamse35_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';', usecols=noise_columns)

In [None]:
# # updated noise data - January
# naamse35_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_jan = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jan/Jan/csv_results_42_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - February
# naamse35_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_feb = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Feb/Feb/csv_results_42_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - March
# naamse35_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_mar = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/March/March/csv_results_44_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - April
# naamse35_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_apr = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/April/April/csv_results_45_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - May
# naamse35_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_may = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/May/May/csv_results_46_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - June
# naamse35_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_jun = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/June/June/csv_results_47_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - July
# naamse35_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_jul = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Jul/Jul/csv_results_48_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - August
# naamse35_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_aug = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Aug/Aug/csv_results_49_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - September
# naamse35_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_sep = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Sep/Sep/csv_results_50_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - October
# naamse35_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_oct = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Oct/Oct/csv_results_51_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - November
# naamse35_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_nov = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Nov/Nov/csv_results_52_303910_mp-04-his-hears.csv', header=0, sep=';')

In [None]:
# # updated noise data - December
# naamse35_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255439_mp-01-naamsestraat-35-maxim.csv', header=0, sep=';')
# naamse57_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255440_mp-02-naamsestraat-57-xior.csv', header=0, sep=';')
# naamse62_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255441_mp-03-naamsestraat-62-taste.csv', header=0, sep=';')
# calvarie_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255442_mp-05-calvariekapel-ku-leuven.csv', header=0, sep=';')
# park_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255443_mp-06-parkstraat-2-la-filosovia.csv', header=0, sep=';')
# naamse81_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255444_mp-07-naamsestraat-81.csv', header=0, sep=';')
# kiosk_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_255445_mp-08-kiosk-stadspark.csv', header=0, sep=';')
# vrijt_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_280324_mp08bis---vrijthof.csv', header=0, sep=';')
# his_dec = pd.read_csv('https://mda-georgia-bucket.s3.eu-central-1.amazonaws.com/Noise+data/Dec/Dec/csv_results_53_303910_mp-04-his-hears.csv', header=0, sep=';')

### Combining and aggregating the meteo data

In [None]:
# # combine meteo dataset 
# meteocombined = pd.concat([Q1_2022, Q2_2022, Q3_2022, Q4_2022], axis=0)
# meteocombined.head()

In [None]:
# check for missing values in each column
# print(meteocombined.isnull().sum())

In [None]:
# # aggregate meteo data by day
# avg_meteo_combined = meteocombined.groupby(['Year','Month', 'Day']).mean()
# avg_meteo_combined = avg_meteo_combined.reset_index()
# avg_meteo_combined.head()


In [None]:
# month_max_value = avg_meteo_combined['Month'].max()
# print(f"This combined meteo dataset contains the weather data for all {month_max_value} months.")

### Combining and aggregating the noise data

- January

In [None]:
# # combine noise data for January together
# noise_jan_combined = pd.concat([naamse35_jan, naamse57_jan, naamse62_jan, calvarie_jan, park_jan, naamse81_jan, kiosk_jan, vrijt_jan, his_jan], axis=0)
# noise_jan_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_jan_combined['result_timestamp'] = pd.to_datetime(noise_jan_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_jan_combined['result_date'] = noise_jan_combined['result_timestamp'].dt.date
# noise_jan_combined['result_month'] = noise_jan_combined['result_timestamp'].dt.month
# noise_jan_combined['result_day'] = noise_jan_combined['result_timestamp'].dt.day
# noise_jan_combined['result_hour'] = noise_jan_combined['result_timestamp'].dt.hour
# noise_jan_combined['result_minute'] = noise_jan_combined['result_timestamp'].dt.minute

# noise_jan_combined.head()

In [None]:
# # aggregate the data by day
# avg_jan_combined = noise_jan_combined.groupby(['result_date','description']).mean()
# avg_jan_combined = avg_jan_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_jan_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_jan_combined.head(150)

In [None]:
# check for missing values in each column
# print(avg_jan_combined.isnull().sum())

- February

In [None]:
# # combine noise data for February together
# noise_feb_combined = pd.concat([naamse35_feb, naamse57_feb, naamse62_feb, calvarie_feb, park_feb, naamse81_feb, kiosk_feb, vrijt_feb, his_feb], axis=0)
# noise_feb_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_feb_combined['result_timestamp'] = pd.to_datetime(noise_feb_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_feb_combined['result_date'] = noise_feb_combined['result_timestamp'].dt.date
# noise_feb_combined['result_month'] = noise_feb_combined['result_timestamp'].dt.month
# noise_feb_combined['result_day'] = noise_feb_combined['result_timestamp'].dt.day
# noise_feb_combined['result_hour'] = noise_feb_combined['result_timestamp'].dt.hour
# noise_feb_combined['result_minute'] = noise_feb_combined['result_timestamp'].dt.minute

# noise_feb_combined.head()

In [None]:
# # aggregate the data by day
# avg_feb_combined = noise_feb_combined.groupby(['result_date','description']).mean()
# avg_feb_combined = avg_feb_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_feb_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_feb_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_feb_combined.isnull().sum())

- March

In [None]:
# # combine noise data for March together
# noise_mar_combined = pd.concat([naamse35_mar, naamse57_mar, naamse62_mar, calvarie_mar, park_mar, naamse81_mar, kiosk_mar, vrijt_mar, his_mar], axis=0)
# noise_mar_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_mar_combined['result_timestamp'] = pd.to_datetime(noise_mar_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_mar_combined['result_date'] = noise_mar_combined['result_timestamp'].dt.date
# noise_mar_combined['result_month'] = noise_mar_combined['result_timestamp'].dt.month
# noise_mar_combined['result_day'] = noise_mar_combined['result_timestamp'].dt.day
# noise_mar_combined['result_hour'] = noise_mar_combined['result_timestamp'].dt.hour
# noise_mar_combined['result_minute'] = noise_mar_combined['result_timestamp'].dt.minute

# noise_mar_combined.head()

In [None]:
# # aggregate the data by day
# avg_mar_combined = noise_mar_combined.groupby(['result_date','description']).mean()
# avg_mar_combined = avg_mar_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_mar_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_mar_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_mar_combined.isnull().sum())

- April 

In [None]:
# # combine noise data for April together
# noise_apr_combined = pd.concat([naamse35_apr, naamse57_apr, naamse62_apr, calvarie_apr, park_apr, naamse81_apr, kiosk_apr, vrijt_apr, his_apr], axis=0)
# noise_apr_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_apr_combined['result_timestamp'] = pd.to_datetime(noise_apr_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_apr_combined['result_date'] = noise_apr_combined['result_timestamp'].dt.date
# noise_apr_combined['result_month'] = noise_apr_combined['result_timestamp'].dt.month
# noise_apr_combined['result_day'] = noise_apr_combined['result_timestamp'].dt.day
# noise_apr_combined['result_hour'] = noise_apr_combined['result_timestamp'].dt.hour
# noise_apr_combined['result_minute'] = noise_apr_combined['result_timestamp'].dt.minute

# noise_apr_combined.head()

In [None]:
# # aggregate the data by day
# avg_apr_combined = noise_apr_combined.groupby(['result_date','description']).mean()
# avg_apr_combined = avg_apr_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_apr_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_apr_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_apr_combined.isnull().sum())

- May 

In [None]:
# # combine noise data for May together
# noise_may_combined = pd.concat([naamse35_may, naamse57_may, naamse62_may, calvarie_may, park_may, naamse81_may, kiosk_may, vrijt_may, his_may], axis=0)
# noise_may_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_may_combined['result_timestamp'] = pd.to_datetime(noise_may_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_may_combined['result_date'] = noise_may_combined['result_timestamp'].dt.date
# noise_may_combined['result_month'] = noise_may_combined['result_timestamp'].dt.month
# noise_may_combined['result_day'] = noise_may_combined['result_timestamp'].dt.day
# noise_may_combined['result_hour'] = noise_may_combined['result_timestamp'].dt.hour
# noise_may_combined['result_minute'] = noise_may_combined['result_timestamp'].dt.minute

# noise_may_combined.head()

In [None]:
# # aggregate the data by day
# avg_may_combined = noise_may_combined.groupby(['result_date','description']).mean()
# avg_may_combined = avg_may_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_may_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_may_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_may_combined.isnull().sum())

- June

In [None]:
# # combine noise data for June together
# noise_jun_combined = pd.concat([naamse35_jun, naamse57_jun, naamse62_jun, calvarie_jun, park_jun, naamse81_jun, kiosk_jun, vrijt_jun, his_jun], axis=0)
# noise_jun_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_jun_combined['result_timestamp'] = pd.to_datetime(noise_jun_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_jun_combined['result_date'] = noise_jun_combined['result_timestamp'].dt.date
# noise_jun_combined['result_month'] = noise_jun_combined['result_timestamp'].dt.month
# noise_jun_combined['result_day'] = noise_jun_combined['result_timestamp'].dt.day
# noise_jun_combined['result_hour'] = noise_jun_combined['result_timestamp'].dt.hour
# noise_jun_combined['result_minute'] = noise_jun_combined['result_timestamp'].dt.minute

# noise_jun_combined.head()

In [None]:
# aggregate the data by day
# avg_jun_combined = noise_jun_combined.groupby(['result_date','description']).mean()
# avg_jun_combined = avg_jun_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_jun_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_jun_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_jun_combined.isnull().sum())

- July

In [None]:
# # combine noise data for July together
# noise_jul_combined = pd.concat([naamse35_jul, naamse57_jul, naamse62_jul, calvarie_jul, park_jul, naamse81_jul, kiosk_jul, vrijt_jul, his_jul], axis=0)
# noise_jul_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_jul_combined['result_timestamp'] = pd.to_datetime(noise_jul_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_jul_combined['result_date'] = noise_jul_combined['result_timestamp'].dt.date
# noise_jul_combined['result_month'] = noise_jul_combined['result_timestamp'].dt.month
# noise_jul_combined['result_day'] = noise_jul_combined['result_timestamp'].dt.day
# noise_jul_combined['result_hour'] = noise_jul_combined['result_timestamp'].dt.hour
# noise_jul_combined['result_minute'] = noise_jul_combined['result_timestamp'].dt.minute

# noise_jul_combined.head()

In [None]:
# # aggregate the data by day
# avg_jul_combined = noise_jul_combined.groupby(['result_date','description']).mean()
# avg_jul_combined = avg_jul_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_jul_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_jul_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_jul_combined.isnull().sum())

- August

In [None]:
# # combine noise data for August together
# noise_aug_combined = pd.concat([naamse35_aug, naamse57_aug, naamse62_aug, calvarie_aug, park_aug, naamse81_aug, kiosk_aug, vrijt_aug, his_aug], axis=0)
# noise_aug_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_aug_combined['result_timestamp'] = pd.to_datetime(noise_aug_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_aug_combined['result_date'] = noise_aug_combined['result_timestamp'].dt.date
# noise_aug_combined['result_month'] = noise_aug_combined['result_timestamp'].dt.month
# noise_aug_combined['result_day'] = noise_aug_combined['result_timestamp'].dt.day
# noise_aug_combined['result_hour'] = noise_aug_combined['result_timestamp'].dt.hour
# noise_aug_combined['result_minute'] = noise_aug_combined['result_timestamp'].dt.minute

# noise_aug_combined.head()

In [None]:
# # aggregate the data by day
# avg_aug_combined = noise_aug_combined.groupby(['result_date','description']).mean()
# avg_aug_combined = avg_aug_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_aug_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_aug_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_aug_combined.isnull().sum())

- September

In [None]:
# # combine noise data for September together
# noise_sep_combined = pd.concat([naamse35_sep, naamse57_sep, naamse62_sep, calvarie_sep, park_sep, naamse81_sep, kiosk_sep, vrijt_sep, his_sep], axis=0)
# noise_sep_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_sep_combined['result_timestamp'] = pd.to_datetime(noise_sep_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_sep_combined['result_date'] = noise_sep_combined['result_timestamp'].dt.date
# noise_sep_combined['result_month'] = noise_sep_combined['result_timestamp'].dt.month
# noise_sep_combined['result_day'] = noise_sep_combined['result_timestamp'].dt.day
# noise_sep_combined['result_hour'] = noise_sep_combined['result_timestamp'].dt.hour
# noise_sep_combined['result_minute'] = noise_sep_combined['result_timestamp'].dt.minute

# noise_sep_combined.head()

In [None]:
# # aggregate the data by day
# avg_sep_combined = noise_sep_combined.groupby(['result_date','description']).mean()
# avg_sep_combined = avg_sep_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_sep_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_sep_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_sep_combined.isnull().sum())

- October

In [None]:
# # combine noise data for Octber together
# noise_oct_combined = pd.concat([naamse35_oct, naamse57_oct, naamse62_oct, calvarie_oct, park_oct, naamse81_oct, kiosk_oct, vrijt_oct, his_oct], axis=0)
# noise_oct_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_oct_combined['result_timestamp'] = pd.to_datetime(noise_oct_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_oct_combined['result_date'] = noise_oct_combined['result_timestamp'].dt.date
# noise_oct_combined['result_month'] = noise_oct_combined['result_timestamp'].dt.month
# noise_oct_combined['result_day'] = noise_oct_combined['result_timestamp'].dt.day
# noise_oct_combined['result_hour'] = noise_oct_combined['result_timestamp'].dt.hour
# noise_oct_combined['result_minute'] = noise_oct_combined['result_timestamp'].dt.minute

# noise_oct_combined.head()

In [None]:
# # aggregate the data by day
# avg_oct_combined = noise_oct_combined.groupby(['result_date','description']).mean()
# avg_oct_combined = avg_oct_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_oct_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_oct_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_oct_combined.isnull().sum())

- November

In [None]:
# # combine noise data for November together
# noise_nov_combined = pd.concat([naamse35_nov, naamse57_nov, naamse62_nov, calvarie_nov, park_nov, naamse81_nov, kiosk_nov, vrijt_nov, his_nov], axis=0)
# noise_nov_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_nov_combined['result_timestamp'] = pd.to_datetime(noise_nov_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_nov_combined['result_date'] = noise_nov_combined['result_timestamp'].dt.date
# noise_nov_combined['result_month'] = noise_nov_combined['result_timestamp'].dt.month
# noise_nov_combined['result_day'] = noise_nov_combined['result_timestamp'].dt.day
# noise_nov_combined['result_hour'] = noise_nov_combined['result_timestamp'].dt.hour
# noise_nov_combined['result_minute'] = noise_nov_combined['result_timestamp'].dt.minute

# noise_nov_combined.head()

In [None]:
# # aggregate the data by day
# avg_nov_combined = noise_nov_combined.groupby(['result_date','description']).mean()
# avg_nov_combined = avg_nov_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_nov_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_nov_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_nov_combined.isnull().sum())

- December

In [None]:
# # combine noise data for December together
# noise_dec_combined = pd.concat([naamse35_dec, naamse57_dec, naamse62_dec, calvarie_dec, park_dec, naamse81_dec, kiosk_dec, vrijt_dec, his_dec], axis=0)
# noise_dec_combined.head()

In [None]:
# # extract the date, month, hour, minute of "result_timestamp"
# noise_dec_combined['result_timestamp'] = pd.to_datetime(noise_dec_combined['result_timestamp'], format='%d/%m/%Y %H:%M:%S.%f')


# noise_dec_combined['result_date'] = noise_dec_combined['result_timestamp'].dt.date
# noise_dec_combined['result_month'] = noise_dec_combined['result_timestamp'].dt.month
# noise_dec_combined['result_day'] = noise_dec_combined['result_timestamp'].dt.day
# noise_dec_combined['result_hour'] = noise_dec_combined['result_timestamp'].dt.hour
# noise_dec_combined['result_minute'] = noise_dec_combined['result_timestamp'].dt.minute

# noise_dec_combined.head()

In [None]:
# # aggregate the data by day
# avg_dec_combined = noise_dec_combined.groupby(['result_date','description']).mean()
# avg_dec_combined = avg_dec_combined.reset_index()
# columns_to_drop = ['result_hour', 'result_minute']
# avg_dec_combined.drop(columns_to_drop, axis=1, inplace=True)
# avg_dec_combined.head(150)

In [None]:
# # check for missing values in each column
# print(avg_dec_combined.isnull().sum())

Combining monthly noise level datasets into a yearly dataset

In [None]:
# List of the monthly datasets
datasets = [avg_jan_combined, avg_feb_combined, avg_mar_combined, avg_apr_combined, avg_may_combined, avg_jun_combined, avg_jul_combined, avg_aug_combined, avg_sep_combined, avg_oct_combined, avg_nov_combined, avg_dec_combined]

# Concatenate the datasets vertically
avg_year_combined = pd.concat(datasets, ignore_index=True)

# Sort the combined dataset by 'result_date' in ascending order
avg_year_combined.sort_values(by='result_date', inplace=True)

# Reset the index of the combined dataset
avg_year_combined.reset_index(drop=True, inplace=True)

# Display the combined and sorted yearly dataset
avg_year_combined.head(2000)
