In [1]:
# Import necessary libraries
import numpy as np  # Import NumPy for numerical operations
import pandas as pd  # Import Pandas for data manipulation
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting the dataset
from sklearn.linear_model import LinearRegression  # Import LinearRegression for creating a linear regression model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # Import metrics for model evaluation
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for feature scaling
import matplotlib.pyplot as plt  # Import Matplotlib for data visualization
import pickle  # Import Pickle for saving the trained model

In [2]:
data=pd.read_csv("processed data/data.csv")

In [3]:
X=data.drop('fare_amount',axis=1)
y=data['fare_amount']

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [6]:
# Create a Linear Regression model
model=Pipeline([
   ( 'scaler',StandardScaler()),
    ('regressor',
   LinearRegression(
))
])

In [7]:
# Fit the Linear Regression model using the training data
model.fit(X_train, y_train)

In [8]:
mean_squared_error(model.predict(X_test),y_test)
# Mean Absolute Error (MAE)
mae = mean_absolute_error(model.predict(X_test),y_test)
print("MAE:", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(model.predict(X_test),y_test)
print("MSE:", mse)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("RMSE:", rmse)

# R-squared
r2 = r2_score(model.predict(X_test),y_test)
print("R-squared:", r2)

MAE: 1.3640649139106058
MSE: 3.4459246463617283
RMSE: 1.8563201896121608
R-squared: 0.5286858037166102


In [10]:
model.coef_


array([[ 4.41289294e-03,  4.78148986e-02,  2.32266094e-01,
        -6.85824186e-02,  6.85407080e-01, -7.41378002e-01,
        -1.78523133e+00, -1.79504173e-01,  3.25309604e-01,
         1.37735611e+00,  4.60188314e+00]])

In [11]:
model.intercept_

array([10.11032399])