In [472]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score

In [473]:
import numpy as np
class Metrics:
    @staticmethod
    def  mean_absolute_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs(y_true-predictions)))
    @staticmethod
    def mean_squared_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean((y_true-predictions)**2))
    @staticmethod
    def root_mean_squared_error(y_test, y_pred):
        return float(np.sqrt(Metrics.mean_squared_error(y_test, y_pred)))
    @staticmethod
    def mean_absolute_percentage_error(y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        return float(np.mean(np.abs((y_true-predictions)/y_true)))
    @staticmethod
    def r_2_score( y_test, y_pred):
        y_true, predictions = np.array(y_test), np.array(y_pred)
        mean_value = np.mean(y_test)
        return float(1 - np.sum((y_true - predictions)**2)/ np.sum((y_true - mean_value)**2))
    

In [474]:
import numpy as np
class MyLinearRegression:
    def __init__( self, learning_rate = 0.0001, iterations =1000 ) :
        self.learning_rate = learning_rate        
        self.iterations = iterations        

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y, w):
        return sum((y - np.dot(x, w)) ** 2  )/x.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-20
        X = self.transform_(x)

        w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iterations:
            loss = self.loss_func(X, y,w)
            w = w - self.learning_rate * 2 * np.dot(X.T, np.dot(X, w) - y) /X.shape[0]
            dist = np.abs(loss - self.loss_func(X, y,w))
            iter += 1
            
            if(dist <= eps):
                break
        print(iter)

        self.w = w
        return self
        

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [475]:
data= pd.read_csv("../data/trip_duration_task_m.csv")
data.drop(["Unnamed: 0"], axis=1, inplace=True)


In [476]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199494 entries, 0 to 199493
Data columns (total 5 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   pickup_longitude   199494 non-null  float64
 1   pickup_latitude    199494 non-null  float64
 2   dropoff_longitude  199494 non-null  float64
 3   dropoff_latitude   199494 non-null  float64
 4   trip_duration      199494 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 7.6 MB


In [477]:
y = data['trip_duration']
X = data.drop(['trip_duration'], axis=1)

In [478]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [479]:
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_std = pd.DataFrame(standardScaler.transform(X_train), columns=X.columns)
X_test_std = pd.DataFrame(standardScaler.transform(X_test), columns=X.columns)
X_train_std.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,5.485419e-14,-9.784435e-14,9.291421e-14,-5.875157e-14
std,1.000003,1.000003,1.000003,1.000003
min,-11.06038,-11.96884,-18.37213,-14.10062
25%,-0.4796059,-0.4797874,-0.4881299,-0.4898499
50%,-0.2182762,0.112256,-0.174153,0.08246589
75%,0.1557606,0.61528,0.2780986,0.5507349
max,14.4775,20.09006,34.39554,17.40549


In [480]:
minMaxScaler = MinMaxScaler()
minMaxScaler.fit(X_train)
X_train_mms = pd.DataFrame(minMaxScaler.transform(X_train), columns=X.columns)
X_test_mms = pd.DataFrame(minMaxScaler.transform(X_test), columns=X.columns)
X_train_mms.describe()

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
count,159595.0,159595.0,159595.0,159595.0
mean,0.433097,0.373339,0.34817,0.447552
std,0.039158,0.031193,0.018951,0.03174
min,0.0,0.0,0.0,0.0
25%,0.414317,0.358373,0.33892,0.432004
50%,0.42455,0.376841,0.34487,0.450169
75%,0.439196,0.392531,0.35344,0.465032
max,1.0,1.0,1.0,1.0


In [481]:
np.set_printoptions(suppress=True)

In [482]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_std, y_train)
y_pred_pf = myLinearRegression.predict(X_test_std)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

1001
MAE: 807.8950778774531
MSE: 11990532.5824192
RMSE: 3462.7348414828416
MAPE: 0.8204543318050092
R^2: -0.0525336


In [483]:
myLinearRegression = MyLinearRegression()
model = myLinearRegression.fit(X_train_mms, y_train)
y_pred_pf = myLinearRegression.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred_pf)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred_pf)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred_pf)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred_pf)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred_pf):.7f}')

1001
MAE: 727.2149397905063
MSE: 11891540.299831105
RMSE: 3448.4112718513006
MAPE: 0.7007849214077622
R^2: -0.0438440


In [486]:
import numpy as np
class MyRidge:
    def __init__( self, learning_rate, iterations, alpha ) :
        self.learning_rate = learning_rate        
        self.iterations = iterations        
        self.alpha = alpha

    def transform_(self, x):
        return np.concatenate((np.ones((len(x), 1)), x), axis = 1)

    def loss_func(self, x, y, w):
        return sum(y - np.dot(x, w)) ** 2  + self.alpha * (np.matmul(w, w))/X.shape[0]

    def fit(self, x, y):
        dist = np.inf
        eps = 1e-20
        X = self.transform_(x)

        w = np.zeros(X.shape[1])
        iter = 0

        while iter <= self.iterations:
            loss = self.loss_func(X, y, w)
            w = w - ((self.learning_rate * 2 * np.dot(X.T, np.dot(X, w) - y)) + ( 2 * self.alpha * w ))/X.shape[0]
            dist = np.abs(loss - self.loss_func(X, y, w))
            iter += 1
            
            if(dist <= eps):
                break
        print(iter)
        self.w = w
        return self
        

    def predict(self, x):
        return np.dot(self.transform_(x), self.w)

In [487]:
ridge = MyRidge(0.01, 1000, 1)
model = ridge.fit(X_train_std, y_train)
y_pred = ridge.predict(X_test_std)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred):.7f}')

1001
MAE: 584.3242359126286
MSE: 11323170.7975201
RMSE: 3364.9919461300497
MAPE: 1.5874738268237476
R^2: 0.0060477


In [488]:
ridge = MyRidge(0.01, 1000, 0.5)
model = ridge.fit(X_train_mms, y_train)
y_pred = ridge.predict(X_test_mms)
print(f'MAE: {Metrics.mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {Metrics.mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {Metrics.root_mean_squared_error(y_test, y_pred)}')
print(f'MAPE: {Metrics.mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {Metrics.r_2_score(y_test, y_pred):.7f}')

1001
MAE: 633.6350109343017
MSE: 11385787.184564859
RMSE: 3374.283210485578
MAPE: 1.5329645008093007
R^2: 0.0005512
