In [1]:
from sklearn.model_selection import train_test_split
from dateutil.relativedelta import relativedelta
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import radians, cos, sin, asin, sqrt
from sklearn.svm import OneClassSVM
#from functionsPredictions import *
from sklearn import preprocessing
import matplotlib.pyplot as plt
import category_encoders as ce
from datetime import timedelta
from scipy import stats
import seaborn as sns
import xgboost as xgb
import datetime as dt
import pandas as pd
import numpy as np
import warnings
import glob

warnings.filterwarnings('ignore')

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     display(df_holiday2)

In [2]:
vondelpark_west = [{'lat': 52.356496, 'lng': 4.861447}]
vondelpark_oost_3 = [{'lng': 4.869217, 'lat': 52.358252}]
vondelpark_oost_2 = [{'lng': 4.874692, 'lat': 52.359798}]
vondelpark_oost_1 = [{'lng': 4.879652, 'lat': 52.360991}]
oosterpark = [{'lng': 4.920558, 'lat': 52.360098}]
sarphatipark = [{'lng': 4.896375, 'lat': 52.354364}]
westerpark_west = [{'lng': 4.867128, 'lat': 52.387099}]
westerpark_centrum = [{'lng': 4.873268, 'lat': 52.387374}]
westerpark_oost = [{'lng': 4.878379, 'lat': 52.386379}]
westergasfabriek = [{'lng': 4.869769, 'lat': 52.385920}]
rembrandtpark_noord = [{'lng': 4.846573, 'lat': 52.366664}]
rembrandtpark_zuid = [{'lng': 4.846932, 'lat': 52.361161}]
erasmuspark = [{'lng': 4.851909, 'lat': 52.374808}]
amstelpark = [{'lng': 4.894404, 'lat': 52.330409}]
park_frankendael = [{'lng': 4.929839, 'lat': 52.350703}]
beatrixpark = [{'lng': 4.881352, 'lat': 52.342471}]
flevopark = [{'lng': 4.947881, 'lat': 52.360087}]
gaasperpark = [{'lng': 4.992192, 'lat': 52.310420}]
nelson_mandelapark = [{'lng': 4.963691, 'lat': 52.312204}]
noorderpark = [{'lng': 4.919606, 'lat': 52.392651}]
sloterpark = [{'lng': 4.811894, 'lat': 52.366219}]
wh_vliegenbos = [{'lng': 4.931495, 'lat': 52.388802}]


In [18]:
def preprocessResono(path_url):
    '''
    Reads in and preprocesses the resono data
    
    :path_url: The path_url to the resono data
    
    Returns a preprocessed Dataframe
    '''
    
    df = pd.read_csv(path_url)
    df = df.drop(columns = ["Unnamed: 0"])
    
    df['End'] = pd.to_datetime(df['End'])
    df['End'] = pd.to_datetime(df['End'].dt.strftime("%Y-%m-%d %H:%M:%S"))
    
    df = df.rename(columns = {'End' : 'Datetime',
                              'End_Dates' : 'Date',
                              'End_Time' : 'Time'})
    df = df.set_index('Datetime')
    df = df.loc['2020-10':]
    
    df = df[df.Location != 'Vondelpark Oost']
    df = df[df.Location != 'Westerpark']
    df = df[df.Location != 'Rembrandtpark Noord']
    df = df[df.Location != 'Rembrandtpark Zuid']

    return df

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def ceil_dt(dt, delta):
    return dt + (dt.min - dt) % delta

def getParkSuggestion(df, date, lat, lng, pred):
    resono = df.reset_index()

    prediction_date_4_months = date + relativedelta(months=-4, days=-1)
    df_4months = resono[(resono['Datetime'] >= prediction_date_4_months) & (resono['Datetime'] < date)]
    
    df_4months['Weekday'] = df_4months['Datetime'].apply(lambda x: x.weekday())
    df_4months_groupby = df_4months.groupby(['Time', 'Weekday', 'Location']).mean()
    
    df_4months_groupby = df_4months_groupby.reset_index()
    df_baseline = df_4months_groupby[(df_4months_groupby['Weekday'] == date.weekday()) & 
                                  (df_4months_groupby['Time'] == ceil_dt(prediction_time + timedelta(minutes=15), timedelta(minutes=15)).strftime("%H:%M:%S"))]
    
    df_baseline['Longitude'] = df_baseline['Location'].apply(lambda x: globals()["_".join(f"{x.lower()}".split())][0]['lng'])
    df_baseline['Latitude'] = df_baseline['Location'].apply(lambda x: globals()["_".join(f"{x.lower()}".split())][0]['lat'])
    
    df_baseline['Distance'] = [haversine(lat, lng, df_baseline.iloc[x]['Latitude'], df_baseline.iloc[x]['Longitude']) 
                         for x in range(df_baseline.shape[0])]

    df_baseline['Predictions'] = pred
    df_baseline['Crowdedness factor'] = (df_baseline['Predictions'] - df_baseline['Visits']) / df_baseline['Visits'] #(baseline - values) / values
    df_baseline['Park suggestion'] = df_baseline['Distance'] + (df_baseline['Crowdedness factor']*5)
    df_baseline['Distance'] = round(df_baseline['Distance'],2)
    
    df_baseline = df_baseline.reset_index(drop=True)
    df_baseline.index += 1
    
    if(df_baseline['Distance'] <= 3).sum() >= 3: 
        df_baseline = df_baseline[df_baseline['Distance'] <= 3]
        return df_baseline.sort_values(by='Park suggestion')[['Location', 'Distance', 'Park suggestion']].iloc[:3]
    else:
        df_park_suggestion = df_baseline[df_baseline['Distance'] <= 3].sort_values(by='Park suggestion')
        return pd.concat([df_park_suggestion, df_baseline.sort_values(by='Distance')[df_park_suggestion.shape[0]:]], axis=0)

In [5]:
resono = preprocessResono("resono_2020_2022.csv")
resono['Location'] = resono['Location'].str.replace('W.H. Vliegenbos', 'WH Vliegenbos')

In [24]:
prediction_date = dt.datetime(2022, 1, 5) ##Fixed variable
time = dt.datetime.now()
prediction_time = prediction_date + timedelta(hours=time.hour, minutes=time.minute) 


# current_lat = 52.357737
# current_lng = 4.890803

current_lat = 52.332204
current_lng = 4.968691

values = [128, 800, 155, 130, 100, 370, 1050, 600, 280, 590, 420, 610, 566, 300, 453, 38, 145, 106, 172, 146]

park_suggestion = getParkSuggestion(resono, prediction_date, current_lat, current_lng, values)
park_suggestion

Unnamed: 0,Time,Weekday,Location,Visits,Longitude,Latitude,Distance,Predictions,Crowdedness factor,Park suggestion
6,16:00:00,2,Nelson Mandelapark,426.9375,4.963691,52.312204,2.28,370,-0.133363,1.617431
5,16:00:00,2,Gaasperpark,141.4375,4.992192,52.31042,3.56,100,-0.292974,2.092087
4,16:00:00,2,Flevopark,165.625,4.947881,52.360087,3.86,130,-0.215094,2.783985
9,16:00:00,2,Park Frankendael,340.4375,4.929839,52.350703,4.78,280,-0.177529,3.893923
8,16:00:00,2,Oosterpark,476.6875,4.920558,52.360098,6.18,600,0.258686,7.473589
16,16:00:00,2,WH Vliegenbos,37.375,4.931495,52.388802,7.51,38,0.016722,7.594848
1,16:00:00,2,Amstelpark,136.875,4.894404,52.330409,8.26,128,-0.06484,7.93853
10,16:00:00,2,Sarphatipark,585.375,4.896375,52.354364,8.41,590,0.007901,8.447076
7,16:00:00,2,Noorderpark,1101.875,4.919606,52.392651,8.64,1050,-0.047079,8.403555
2,16:00:00,2,Beatrixpark,1287.25,4.881352,52.342471,9.78,800,-0.37852,7.885433


In [15]:
prediction_time

datetime.datetime(2022, 1, 5, 11, 49)

In [None]:
dt.datetime.now()

In [None]:
values = [128, 1000, 155, 130, 100, 370, 1050, 600, 280, 570, 420, 610, 566, 300, 453, 38, 145, 106, 172, 146]