In [150]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.preprocessing import normalize

In [151]:
# Напишіть функцію гіпотези лінійної регресії у векторному вигляді
def h(X, w_0):
    return np.dot(X, w_0)

In [152]:
# Cтворіть функцію для обчислення функції втрат у векторному вигляді
def loss_func(X, y, w_0):
    n = X.shape[0]
    predictions = h(X, w_0)
    squared_errors = (predictions - y) ** 2
    cost = np.sum(squared_errors) / (2*n)
    return cost

In [153]:
# Реалізуйте один крок градієнтного спуску
def grad_step(w_0, grad, learning_rate = 0.001):
    w_0 = w_0 - learning_rate * grad
    return w_0


def grad(X, y, w_0):
    n = X.shape[0]
    result = np.dot(X.T, (h(X, w_0) - y))
    return result / n


def grad_descent(X, y, w_0, num_iter=1000, learning_rate = 0.001, epsilon = 1e-10):
    loss = loss_func(X, y, w_0)
    loss_history = [loss]
    
    for i in range(num_iter):
        d_w = grad(X, y, w_0)
        w_0 = grad_step(w_0, d_w, learning_rate=learning_rate)
        loss = loss_func(X, y, w_0)
        if abs(loss - loss_history[-1]) < epsilon:
            loss_history.append(loss)
            break
        
        loss_history.append(loss)
        
    return w_0, d_w, loss_history

In [154]:
df = pd.read_csv('Housing.csv')
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [160]:
# Перевірка датасету
df.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [155]:
def normalization(value):

  mean = np.mean(value)
  value_range = np.max(value) - np.min(value)
  result = []
  for x in value:
    norm_x = (x-mean) / value_range
    result.append(norm_x)

  return result

In [156]:
norm_df = pd.DataFrame()
norm_df['price'] = normalization(df.price)
norm_df['area'] = normalization(df.area)
norm_df['bedrooms'] = normalization(df.bedrooms)
norm_df['bathrooms'] = normalization(df.bathrooms)
norm_df

Unnamed: 0,price,area,bedrooms,bathrooms
0,0.738811,0.155977,0.206972,0.237920
1,0.647902,0.261818,0.206972,0.904587
2,0.647902,0.330547,0.006972,0.237920
3,0.644872,0.161475,0.206972,0.237920
4,0.575175,0.155977,0.206972,-0.095413
...,...,...,...,...
540,-0.255128,-0.147804,-0.193028,-0.095413
541,-0.259704,-0.189041,0.006972,-0.095413
542,-0.261189,-0.105192,-0.193028,-0.095413
543,-0.261189,-0.153989,0.006972,-0.095413


In [157]:
y = norm_df["price"].values.reshape(-1, 1)
X = norm_df[['area', 'bathrooms', 'bedrooms']].values
N = X.shape[1]
w_0 = np.linspace(0, 0, N).reshape((N, 1))

In [158]:
# Найкращі параметри, отримані за допомогою градієнтного спуску
W, d_w, loss_history = grad_descent(X, y, w_0, 100000)
loss = loss_history[-1]
print(f'Values: {W}')
print(f'Loss func: {loss}')

Values: [[0.43440053]
 [0.35390082]
 [0.18894335]]
Loss func: 0.0067347102657805795


In [159]:
# Найкращі параметри, отримані за допомогою аналітичного рішення
values = np.linalg.pinv(X.T @ X) @ X.T @ y
result = loss_func(X, y, values)
print(f'Values: {values}')
print(f'Loss func: {result}')

Values: [[0.47714269]
 [0.36001286]
 [0.17611257]]
Loss func: 0.006713405108514904
