### Import functions

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.svm import OneClassSVM
from sklearn import preprocessing
import matplotlib.pyplot as plt
from datetime import timedelta
import seaborn as sns
import xgboost as xgb
import datetime as dt
import pandas as pd
import numpy as np
import warnings
import glob

In [12]:
warnings.filterwarnings(action='once')
plt.style.use('seaborn-poster')
sns.set_context("poster") 

### Preprocess, merge and clean functions

In [18]:
def preprocessWeather(path_url):
    '''
    Reads in and preprocesses the weather data
    
    :path_url: The path_url to the weather data
    
    Returns a preprocessed Dataframe
    '''

    df = pd.read_csv(path_url)
    df.columns = df.columns.str.replace(' ', '')
    df[['FH', 'T', 'RH']] = df[['FH', 'T', 'RH']] / 10
    df['YYYYMMDD'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
    df['date'] = df['YYYYMMDD'] +  pd.to_timedelta(df['HH'], unit='h')
    df.drop(columns = ['#STN', 'DD', 'FF', 'FX', 'T10N', 'TD', 'Q', 
                       'P', 'VV', 'U', 'WW', 'IX', 'HH', 'YYYYMMDD'], inplace=True)
    df.set_index('date', inplace=True)
    return df

def preprocessResono(path_url):
    '''
    Reads in and preprocesses the resono data
    
    :path_url: The path_url to the resono data
    
    Returns a preprocessed Dataframe
    '''
    
    df = pd.read_csv(path_url)
    df = df.drop(columns = ["Unnamed: 0"])
    
    df['End'] = pd.to_datetime(df['End'])
    df['End'] = pd.to_datetime(df['End'].dt.strftime("%Y-%m-%d %H:%M:%S"))
    
    df = df.rename(columns = {'End' : 'Datetime',
                              'End_Dates' : 'Date',
                              'End_Time' : 'Time'})
    df = df.set_index('Datetime')
    df = df.loc['2020-10':]
    
    df_resono = df[df.Location != 'Vondelpark Oost']

    return df_resono

def mergeWeatherFiles(df_Weather2020, df_Weather2021):
    '''
    Merges the weather data
    
    :df_Weather2020: Weather data from 2020
    :df_Weather2021: Weather data from 2021
    
    Returns a merged weather Dataframe
    '''
    
    df_weather = pd.concat([df_Weather2020, df_Weather2021], axis=0)
    df_weather = df_weather.loc['2020-10':]

    cols_int = ['SQ', 'DR', 'N', 'M', 'R', 'S', 'O', 'Y']
    cols_float = ['FH', 'T']

    df_weather[cols_float] = df_weather[cols_float].apply(pd.to_numeric, errors='coerce', axis=1)
    df_weather[cols_int] = df_weather[cols_int].apply(pd.to_numeric, errors='coerce', axis=1)
    df_weather['RH'] = df_weather['RH'].apply(lambda x: 0.05 if x==-0.1 else x)
    
    df_weather_resample = pd.concat([df_weather[['FH', 'T', 'N']].resample('15T').interpolate(method='linear'),
                    df_weather[['RH', 'DR', 'SQ', 'M', 'R', 'S', 'O', 'Y']].resample('15T').bfill()],
                   axis=1)
    
    df_weather_resample[['DR', 'SQ']] = df_weather_resample[['DR', 'SQ']] * 1.5
    df_weather_resample['RH'] = df_weather_resample['RH'] / 4
    
    return df_weather_resample 

def mergeWeatherResono(df_resono, df_weather):
    '''
    Merges the resono and weather data
    
    :df_resono: All resono data
    :df_weather: All weather data
    
    Returns a merged weather Dataframe
    '''
    
    
    merge_resono_weather = pd.merge(df_resono, df_weather, left_index=True, right_index=True, how='left')
    merge_resono_weather = merge_resono_weather.rename({'T': 'Temperature', 'N': 'Clouds', 'FH': 'Windspeed',
                                                    'RH': 'Rain amount', 'DR': 'Rain duration' , 'SQ': 'Sun duration',
                                                    'M': 'Fog', 'R': 'Rain', 'S': 'Snow', 'O': 'Thunder', 'Y': 'Ice'},
                                                   axis=1) 
    return merge_resono_weather

def clean_resono(df, merge=False):
    '''
    ~~Probably defunct once we merge the datasets~~
    Quick cleaning of the Resono data
    
    :df: Dataframe to clean
    :merge: True if Noord/Zuid and Oost/West need to be merged (default = False)
    
    Returns a cleaned Dataframe
    '''
    df.loc[(df.Location == 'Westergasfabriek'),'Location'] = 'Westerpark'
    df['End'] = pd.to_datetime(df['End'])
    
    if merge == True:
        # Merge Noord-Zuid & Oost-West into one location
        df.loc[df.Location.str.contains('Rembrandtpark'), 'Location'] = 'Rembrandtpark'
        df.loc[df.Location.str.contains('Vondelpark'), 'Location'] = 'Vondelpark'
    
    df = df.rename(columns = {'End' : 'Datetime',
                              'End_Dates' : 'Date',
                              'End_Time' : 'Time'})
    df = df.set_index('Datetime')
    return df
    

### Reading in filepaths

In [19]:
df_Weather2020 = preprocessWeather("KNMI (Weather) 2020-2021/uurgeg_240_2011-2020.txt")
df_Weather2021 = preprocessWeather("KNMI (Weather) 2020-2021/uurgeg_240_2021-2030_new.txt")
df_resono = preprocessResono("resono_2020_2022.csv")

df_weather = mergeWeatherFiles(df_Weather2020, df_Weather2021)
df_resono_weather = mergeWeatherResono(df_resono, df_weather)

  """Entry point for launching an IPython kernel.


In [20]:
df_resono

Unnamed: 0_level_0,Location,Visits,Date,Time
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-10-01 00:15:00,Erasmuspark,243,2020-10-01,00:15:00
2020-10-01 00:30:00,Erasmuspark,228,2020-10-01,00:30:00
2020-10-01 00:45:00,Erasmuspark,175,2020-10-01,00:45:00
2020-10-01 01:00:00,Erasmuspark,190,2020-10-01,01:00:00
2020-10-01 01:15:00,Erasmuspark,128,2020-10-01,01:15:00
...,...,...,...,...
2022-01-06 13:45:00,Westerpark West,116,2022-01-06,13:45:00
2022-01-06 14:00:00,Westerpark West,107,2022-01-06,14:00:00
2022-01-06 14:15:00,Westerpark West,112,2022-01-06,14:15:00
2022-01-06 14:30:00,Westerpark West,114,2022-01-06,14:30:00
