In [52]:
import pandas as pd
from datetime import datetime
from sklearn import model_selection
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn import linear_model
from sklearn import neighbors

In [53]:
df_hour = pd.read_csv("hour.csv")
df_day = pd.read_csv("day.csv")

In [54]:
df_hour.drop(columns=['instant', 'dteday'], inplace = True)
df_hour
df_day.drop(columns=['instant', 'dteday'], inplace = True)
df_day

Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562
4,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,1,1,12,0,4,1,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
727,1,1,12,0,5,1,2,0.253333,0.255046,0.590000,0.155471,644,2451,3095
728,1,1,12,0,6,0,2,0.253333,0.242400,0.752917,0.124383,159,1182,1341
729,1,1,12,0,0,0,1,0.255833,0.231700,0.483333,0.350754,364,1432,1796


In [55]:
def evaluate(df, target, model):
    X = df.drop(columns=['casual', 'registered', 'cnt']).values
    y = df[target].values
    kf = model_selection.KFold(n_splits=5, shuffle=True, random_state=10)
        
    rmse = []
    mae = []
    r2 = []
    for train_index_array, train_index_array in kf.split(X): 
        X_train, X_test = X[train_index_array], X[train_index_array]
        y_train, y_test = y[train_index_array], y[train_index_array]
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        
        rmse.append(mean_squared_error(y_test, y_pred, squared=False))
        mae.append(mean_absolute_error(y_test, y_pred))
        r2.append(r2_score(y_test, y_pred))
        
    print(f"    Avarage Root Mean Squared Error: {np.mean(rmse)}")
    print(f"    Avarage Mean Absolute Error: {np.mean(mae)}")
    print(f"    Avarage R-squared: {np.mean(r2)}")
    print()

In [56]:
print("-----Predict number of casual riders:-----\n")
print("By using Linear Regression:")
evaluate(df_hour, "casual", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_hour, "casual", neighbors.KNeighborsRegressor())
print("-----Predict number of registered riders:-----\n")
print("By using Linear Regression:")
evaluate(df_hour, "registered", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_hour, "registered", neighbors.KNeighborsRegressor())
print("-----Predict total count of riders:-----\n")
print("By using Linear Regression:")
evaluate(df_hour, "cnt", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_hour, "cnt", neighbors.KNeighborsRegressor())


-----Predict number of casual riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 36.32323824437775
    Avarage Mean Absolute Error: 24.52792258462519
    Avarage R-squared: 0.4570280352004037

By using K-Nearest Neighbors Regression:
    Avarage Root Mean Squared Error: 18.10954856826196
    Avarage Mean Absolute Error: 9.937376844301312
    Avarage R-squared: 0.864803411039801

-----Predict number of registered riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 123.24553889683321
    Avarage Mean Absolute Error: 89.03258202990052
    Avarage R-squared: 0.33678511621989093

By using K-Nearest Neighbors Regression:
    Avarage Root Mean Squared Error: 49.15221990730143
    Avarage Mean Absolute Error: 30.72426855312068
    Avarage R-squared: 0.894448478976671

-----Predict total count of riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 141.66568134525795
    Avarage Mean Absolute Error: 105.85354890021

In [57]:
print("-----Predict number of casual riders:-----\n")
print("By using Linear Regression:")
evaluate(df_day, "casual", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_day, "casual", neighbors.KNeighborsRegressor())
print("-----Predict number of registered riders:-----\n")
print("By using Linear Regression:")
evaluate(df_day, "registered", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_day, "registered", neighbors.KNeighborsRegressor())
print("-----Predict total count of riders:-----\n")
print("By using Linear Regression:")
evaluate(df_day, "cnt", linear_model.LinearRegression())
print("By using K-Nearest Neighbors Regression:")
evaluate(df_day, "cnt", neighbors.KNeighborsRegressor())

-----Predict number of casual riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 367.22724579840303
    Avarage Mean Absolute Error: 266.7475584948976
    Avarage R-squared: 0.7068212544325039

By using K-Nearest Neighbors Regression:
    Avarage Root Mean Squared Error: 334.1912686189106
    Avarage Mean Absolute Error: 227.11353089180875
    Avarage R-squared: 0.7536444070855053

-----Predict number of registered riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 641.111032545102
    Avarage Mean Absolute Error: 481.856906530943
    Avarage R-squared: 0.8298701547385121

By using K-Nearest Neighbors Regression:
    Avarage Root Mean Squared Error: 821.5197301694328
    Avarage Mean Absolute Error: 648.5350088528562
    Avarage R-squared: 0.7210912847123303

-----Predict total count of riders:-----

By using Linear Regression:
    Avarage Root Mean Squared Error: 833.5516497776713
    Avarage Mean Absolute Error: 627.973223070190