In [14]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge, Lasso, LinearRegression 
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

We use the same data here that was used to train the LightGBM model

In [10]:
df = pd.read_parquet('../data/processed_data/yellow_23-24_data.parquet')
y = df['pickup_count']
X = df.drop(columns=['pickup_count'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
LR = LinearRegression()
lasso = Lasso(random_state=42)
ridge = Ridge(random_state=42)

LR.fit(X_train, y_train)
lasso.fit(X_train, y_train)
ridge.fit(X_train, y_train)

y_hat_LR = LR.predict(X_test)
y_hat_lasso = lasso.predict(X_test)
y_hat_ridge = ridge.predict(X_test)

r2_LR = r2_score(y_test, y_hat_LR)
r2_lasso= r2_score(y_test, y_hat_lasso)
r2_ridge = r2_score(y_test, y_hat_ridge)

mae_LR = mean_squared_error(y_test, y_hat_LR)
mae_lasso = mean_squared_error(y_test, y_hat_lasso)
mae_ridge = mean_squared_error(y_test, y_hat_ridge)

print(f"LR    R^2 Score:           {r2_LR:.4f}")
print(f"LR    Mean Squared Error:  {mae_LR:.4f}\n")

print(f"Lasso R^2 Score:           {r2_lasso:.4f}")
print(f"Lasso Mean Squared Error:  {mae_lasso:.4f}\n")

print(f"Ridge R^2 Score:           {r2_ridge:.4f}")
print(f"Ridge Mean Squared Error:  {mae_ridge:.4f}\n")

LR    R^2 Score:           0.1711
LR    Mean Squared Error:  5.5787

Lasso R^2 Score:           -0.0000
Lasso Mean Squared Error:  6.7303

Ridge R^2 Score:           0.1711
Ridge Mean Squared Error:  5.5789



In [16]:
tree = DecisionTreeRegressor(random_state=42)
tree.fit(X_train, y_train)
y_hat_tree = tree.predict(X_test)
r2_tree = r2_score(y_test, y_hat_tree)
mae_tree = mean_squared_error(y_test, y_hat_tree)

print(f"Tree R^2 Score:           {r2_tree:.4f}")
print(f"Tree Mean Squared Error:  {mae_tree:.4f}\n")

Tree R^2 Score:           0.9567
Tree Mean Squared Error:  0.2912

