In [None]:
# Installs the openml library, which is needed to load the Bike Sharing Demand dataset.
! pip install openml

In [2]:
# Imports the necessary libraries for machine learning, data preprocessing, and model evaluation.

import openml
from scipy import stats
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
# Loads the Bike Sharing Demand dataset from OpenML.
datalist = openml.datasets.list_datasets(output_format="dataframe")
dataset = openml.datasets.get_dataset(1414)

In [101]:
# Splits the dataset into feature matrix (X) and target vector (y). Also extracts the categorical features and attribute names.
X, y, categorical_indicator, attribute_names = dataset.get_data(target=dataset.default_target_attribute)

In [102]:
# Drops any missing values from the feature matrix.
X = X.dropna()

In [103]:
# Prints the first few rows of the feature matrix to get an idea of the data.
X.head()

Unnamed: 0,time,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,dayOfWeek
0,00:00:00,1,0,0,1,9.84,14.395,81,0.0,zaterdag
1,01:00:00,1,0,0,1,9.02,13.635,80,0.0,zaterdag
2,02:00:00,1,0,0,1,9.02,13.635,80,0.0,zaterdag
3,03:00:00,1,0,0,1,9.84,14.395,75,0.0,zaterdag
4,04:00:00,1,0,0,1,9.84,14.395,75,0.0,zaterdag


In [104]:
# Prints the first few rows of the target vector to get an idea of the data.
y.head()

0    16.0
1    40.0
2    32.0
3    13.0
4     1.0
Name: count, dtype: float64

In [122]:
# Imports the necessary libraries for model selection, data preprocessing, and model training.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.neighbors import KNeighborsRegressor

In [123]:
# Splits the dataset into training and testing sets. Uses a random state of 101 to ensure reproducibility.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [124]:
# Encodes the categorical features using label encoding.
label_encoder = LabelEncoder()
X_train['dayOfWeek'] = label_encoder.fit_transform(X_train['dayOfWeek'])
X_test['dayOfWeek'] = label_encoder.fit_transform(X_test['dayOfWeek'])
X_train['time'] = label_encoder.fit_transform(X_train['time'])
X_test['time'] = label_encoder.fit_transform(X_test['time'])

In [125]:
# Normalizes the numerical features using MinMaxScaler.
scaler = MinMaxScaler()
X_train[['temp', 'atemp', 'humidity', 'windspeed']] = scaler.fit_transform(X_train[['temp', 'atemp', 'humidity', 'windspeed']])
X_test[['temp', 'atemp', 'humidity', 'windspeed']] = scaler.transform(X_test[['temp', 'atemp', 'humidity', 'windspeed']])


In [150]:
# Trains a K-nearest neighbors regressor with 9 neighbors on the training data.
knn = KNeighborsRegressor(n_neighbors=9)
knn.fit(X_train, y_train)

In [151]:
# Predicts the bike sharing demand on the testing data using the trained KNN regressor.
y_pred = knn.predict(X_test)

In [152]:
# Evaluates the performance of the trained KNN regressor on the testing data using the MSE, MAE, and R2 score metrics.
\
print("MSE:", mean_squared_error(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))

MSE: 7003.607659082407
MAE: 56.299153147638
R2: 0.7862833571216792
