In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
import numpy as np


def MAE(predicted, real):
    return np.sum(np.abs(predicted - real))/real.shape[0]

def MSE(predicted, real):
    return np.sum((real-predicted)**2)/real.shape[0]

def RMSE(predicted, real):
    return np.sqrt(MSE(predicted, real))

def MAPE(predicted, real):
    return np.sum(np.abs((predicted - real) / real))/real.shape[0]

def R2(predicted, real):
    return 1 - ((MSE(predicted, real) * np.mean(real)) / np.sum((real - predicted)**2))

In [3]:
class RidgeReg():
  def __init__(self, alpha):
    self.thetas = None
    self.loss_history = []
    self.alpha = alpha
 
  def add_ones(self, x):
    return np.c_[np.ones((len(x), 1)), x]

  def objective(self, x, y, thetas, n):
    return (np.sum((y - self.h(x, thetas)) ** 2) + self.alpha * np.dot(thetas, thetas)) / (2 * n)
 
  def h(self, x, thetas):
    return np.dot(x, thetas)
 
  def gradient(self, x, y, thetas, n):
    return (np.dot(-x.T, (y - self.h(x, thetas))) + (self.alpha * thetas)) / n
 
  def fit(self, x, y, iter = 2000, learning_rate = 0.05):
    x, y = x.copy(), y.copy()
    x = self.add_ones(x)
 
    thetas, n = np.zeros(x.shape[1]), x.shape[0]
 
    loss_history = []
    for i in range(iter):
      loss_history.append(self.objective(x, y, thetas, n))
      grad = self.gradient(x, y, thetas, n)
      thetas -= learning_rate * grad
    self.thetas = thetas
    self.loss_history = loss_history
  def predict(self, x):
    x = x.copy()
    x = self.add_ones(x)
    return np.dot(x, self.thetas)

In [4]:
df = pd.read_csv( "../data/beautiful_df.csv" )
# df.drop(['Unnamed: 0'], axis=1, inplace=True)
Y = df["price_usd"]
X = df.drop(["price_usd"], axis=1)  

X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1 / 3, random_state = 0 )

 
scaler = StandardScaler()
 
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ridge_gd = RidgeReg(alpha = 0.9)
 
ridge_gd.fit(X_train, Y_train, iter = 500, learning_rate = 0.01)

y_pred = ridge_gd.predict(X_test)

print('MAE: ' + str(MAE(Y_test, y_pred)))
print('MAPE: ' + str(MAPE(Y_test, y_pred)))
print('MSE: ' + str(MSE(Y_test, y_pred)))
print('RMSE: ' + str(RMSE(Y_test, y_pred)))
print('R2: ' + str(R2(Y_test, y_pred)))

MAE: 2297.8955910354157
MAPE: 1.2705056401118358
MSE: 12706021.620589316
RMSE: 3564.55068986111
R2: 0.4863023394685092


In [5]:
y_pred

array([ 7079.16668359,  5866.6650147 ,  5853.82072122, ...,
        8577.55312022,  4776.30090346, 10781.20734707])