In [9]:
from sklearn.linear_model import Lasso, ElasticNet, Ridge, SGDRegressor
from sklearn.svm import SVR, NuSVR
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor
import pandas as pd
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [29]:
table = PrettyTable()
table.field_names = ["Model", "Mean Squared Error", "R² score","Mean Absolute Error"]
models = [LinearRegression(),
DecisionTreeRegressor(random_state = 0),
RandomForestRegressor( random_state=0, n_estimators=300),
SGDRegressor(max_iter=1000, tol=1e-3),
Lasso(alpha=0.1),
ElasticNet(random_state=0),
Ridge(alpha=.5),
BaggingRegressor(),
BaggingRegressor(KNeighborsClassifier(), max_samples=0.5, max_features=0.5),
]

In [10]:
# load prediction dataset
feature_data = pd.read_csv("prediction_data.csv",parse_dates=["start_time"], low_memory=False)

In [26]:
#Splitting the dataset
from sklearn.model_selection import train_test_split
x = feature_data.drop(["trips"],axis=1)
x = feature_data.drop(["start_time"],axis=1)
y = feature_data["trips"]
# Normalizing the data
from sklearn import preprocessing
# x = preprocessing.normalize(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

In [27]:
x_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6132 entries, 5316 to 2732
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   month          6132 non-null   float64
 1   day            6132 non-null   float64
 2   hour           6132 non-null   float64
 3   mean_duration  6132 non-null   float64
 4   max_temp       6132 non-null   float64
 5   precip         6132 non-null   float64
 6   trips          6132 non-null   int64  
 7   isWeekday      6132 non-null   int64  
dtypes: float64(6), int64(2)
memory usage: 431.2 KB


In [31]:
for model in models:
    model.fit(x_train, y_train)
    y_res = model.predict(x_test)
    mse = mean_squared_error(y_test, y_res)
    score = model.score(x_test, y_test)
    mae = mean_absolute_error(y_test,y_res)
    table.add_row([type(model).__name__, format(mse, '.2f'), format(score, '.2f'),format(mae, '.2f')])
    
print(table)

+-----------------------+------------------------------+----------------------------+---------------------+
|         Model         |      Mean Squared Error      |          R² score          | Mean Absolute Error |
+-----------------------+------------------------------+----------------------------+---------------------+
|    LinearRegression   |             0.00             |            1.00            |         0.00        |
| DecisionTreeRegressor |             0.20             |            1.00            |         0.02        |
| RandomForestRegressor |             0.04             |            1.00            |         0.01        |
|      SGDRegressor     | 4119824090795685774884864.00 | -4568055084550811287552.00 |   792489075491.84   |
|         Lasso         |             0.00             |            1.00            |         0.00        |
|       ElasticNet      |             0.00             |            1.00            |         0.03        |
|         Ridge         |   