In [57]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import max_error
from sklearn.metrics import r2_score

In [36]:
df = pd.read_csv("data/bike.csv",sep=',')
df = df.sample(frac=1).reset_index(drop=True)
# Convert datetime to day
df["dteday"] = [ pd.to_datetime(x).day for x in df.dteday.values]
# Rename datetime column
newColumnNames = df.columns.values
newColumnNames[1] = "day"
df.columns = newColumnNames
# Extract ground truth
gt = df.cnt.values
# Remove target columns and useless
df = df.drop(['instant','casual','registered','cnt'],axis=1)
df

Unnamed: 0,day,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,13,4,1,10,5,0,6,0,1,0.30,0.2879,0.61,0.1940
1,14,2,0,6,14,0,2,1,2,0.64,0.6212,0.47,0.1940
2,26,3,1,8,6,0,0,0,1,0.62,0.5606,0.88,0.2985
3,14,1,1,2,1,0,2,1,2,0.26,0.3030,0.56,0.0000
4,8,3,1,9,20,0,6,0,2,0.58,0.5455,0.88,0.0896
5,23,3,1,6,14,0,6,0,1,0.78,0.6818,0.33,0.2537
6,3,3,1,9,3,1,1,0,1,0.66,0.5909,0.89,0.1343
7,21,1,0,2,14,1,1,0,2,0.32,0.3030,0.76,0.2537
8,28,3,1,8,21,0,2,1,1,0.72,0.6667,0.48,0.0896
9,9,3,1,9,23,0,0,0,1,0.56,0.5303,0.52,0.2836


In [54]:
train = []
trainGt = []
test = []
testGt = []
values = df.values
trainCount = len(df) * 0.85
for x in range(len(df)):
    if x < trainCount:
        train.append(values[x])
        trainGt.append(gt[x])
    else:
        test.append(values[x])
        testGt.append(gt[x])
assert len(train) == len(trainGt)
assert len(test) == len(testGt)
print(f"Train size: {len(train)} Test size: {len(test)}")

Train size: 14773 Test size: 2606


In [49]:
train[0]
# trainGt[0]

array([13.    ,  4.    ,  1.    , 10.    ,  5.    ,  0.    ,  6.    ,
        0.    ,  1.    ,  0.3   ,  0.2879,  0.61  ,  0.194 ])

In [58]:
def test_regressor(name,regressor):
    regressor.fit(train, trainGt)
    predictedValues = regressor.predict(test)
    r2Score = regressor.score(test, testGt)
    mse = mean_squared_error(testGt, predictedValues)
    maxE = max_error(testGt, predictedValues)
    return { "name": name, "r2": r2Score, "mse": mse, "max_error": maxE }

In [60]:
# Ridge classifier
ridgeRegressor = Ridge(alpha=1.0)
test_regressor("Ridge alpha=1.0",ridgeRegressor)

{'name': 'Ridge alpha=1.0',
 'r2': 0.38341906906916634,
 'mse': 20390.747081994323,
 'max_error': 582.2867301644875}

In [61]:
# Ridge classifier
treeRegressor = DecisionTreeRegressor()
test_regressor("DecisionTreeRegressor default",treeRegressor)

{'name': 'DecisionTreeRegressor default',
 'r2': 0.8903072847058252,
 'mse': 3627.6120491174215,
 'max_error': 575.0}