In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math, copy
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split # used to split data set into (x, y) train and (x, y) test

In [None]:
x_train = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
y_train = np.array([250, 300, 480,  430,   630, 730])

<div style="text-align:center; background-color:#dc3545; padding:20px;">
  <h1 style="font-size:36px; color:#ffeeba;"><b>Linear Regression From Scratch</b></h1>
</div>

## Computing Cost
The term 'cost' in this assignment might be a little confusing since the data is housing cost. Here, cost is a measure how well our model is predicting the target price of the house. The term 'price' is used for housing data.

The equation for cost with one variable is:
  $$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})^2 \tag{1}$$ 
 
where 
  $$f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{2}$$
  
- $f_{w,b}(x^{(i)})$ is our prediction for example $i$ using parameters $w,b$.  
- $(f_{w,b}(x^{(i)}) -y^{(i)})^2$ is the squared difference between the target value and the prediction, and it's squared to ignore the negative values.


In [None]:
def cost_function(x_tr, y_tr, w, b):
    m = len(x_tr)
    cost = 0
    
    for i in range(m):
        f_wb = w * x_tr[i] + b
        cost += (f_wb - y_tr[i]) ** 2
    
    total_cost = (1 / (2 * m)) * cost
    return total_cost


## Gradient Descent
The term 'Gradient Descent' used to minimized any function in math, and in our problem we use it to get the minimum cost function.

Gradient Descent:$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline
\;  w &= w -  \alpha \frac{\partial J(w,b)}{\partial w} \tag{3}  \; \newline 
 b &= b -  \alpha \frac{\partial J(w,b)}{\partial b}  \newline \rbrace
\end{align*}$$
where, parameters $w$, $b$ are updated simultaneously.  
The gradient is defined as:
$$
\begin{align}
\frac{\partial J(w,b)}{\partial w}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})x^{(i)} \tag{4}\\
  \frac{\partial J(w,b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) \tag{5}\\
\end{align}
$$


In [None]:
def gradient_derivative(x_tr, y_tr, w, b):
    d_dw = 0
    d_db = 0
    m =len(x_tr)
    
    for i in range(m):
        f_wb = w * x_tr[i] + b
        d_dw_i = (f_wb - y_tr[i]) * x_tr[i]
        d_db_i = (f_wb - y_tr[i])
        d_dw += d_dw_i
        d_db += d_db_i
    d_dw = d_dw / m
    d_db = d_db /m
    
    return d_dw, d_db

In [None]:
def gradient_descent(x_tr, y_tr, w_input, b_input, alpha, iterats, cost_function, gradient_derivative):
    
    j_history = []
    w_b_history = []
    w = w_input
    b = b_input
    
    for i in range(iterats):
        d_dw, d_db = gradient_derivative(x_tr, y_tr, w, b)
        
        w = w - alpha * d_dw
        b = b -alpha * d_db
        
        if i<10000:
            j_history.append(cost_function(x_tr, y_tr , w , b))
            w_b_history.append([w,b])
        
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % 1000 == 0:
            print(f"Iteration {i:4}: Cost {j_history[-1]:0.2e} ",
                  f"d_dw: {d_dw: 0.3e}, d_db: {d_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
        
       
    return w, b, j_history, w_b_history

In [None]:
# initialize parameters
w_init = 0
b_init = 0
iterations = 10000
tmp_alpha = 1.0e-2

# run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, cost_function, gradient_derivative)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

### Predictions

In [None]:
def compute_model_fucntion(x_tr, w, b):
    m = x_tr.shape[0]
    prediction = np.zeros(m)
    for i in range(m):
        prediction[i] = w * x_tr[i] + b
    return prediction

In [None]:
prediction = compute_model_fucntion(x_train, w_final, b_final)
prediction

### Ploting our Predictions

In [None]:
plt.scatter(
    x = x_train,
    y = y_train,
    color='red',
    marker='.',
    label = 'Actual Values'
)

plt.plot(
    x_train,
    prediction,
    color = 'blue',
    label = 'Predict values'
)


plt.title('Houses prices')
plt.xlabel('House size')
plt.ylabel('House price')

plt.show()

In [None]:
# value of w
print(f'The value of w is: {w_final}')

In [None]:
#value of b
print(f'The value of b is: {b_final}')

<div style="text-align:center; background-color:#ff99cc; padding:20px;">
  <h1 style="font-size:36px; color:#FFEBEB;"><b>Linear Regression Using Sklearn</b></h1>
</div>


### Convert numpy array to pandas Data Frame

In [None]:
train = pd.DataFrame(x_train)
test= pd.DataFrame(y_train)

### Split and Fit the model

In [None]:
lin = LinearRegression()
x_train, x_test, y_train, y_test = train_test_split(train, test, test_size=0.2,random_state=0)
lin = LinearRegression()
lin.fit(x_train, y_train)

In [None]:
prediction = lin.predict(x_train)

### Plotting the Predictions

In [None]:
plt.scatter(
    x = x_train,
    y = y_train,
    color='red',
    marker='.',
    label = 'Actual Values'
)

plt.plot(
    x_train,
    prediction,
    color = 'blue',
    label = 'Predict values'
)


plt.title('Houses prices')
plt.xlabel('House size')
plt.ylabel('House price')

plt.show()

In [None]:
w = lin.coef_[0]
print(f'The value of w is: {w}')


In [None]:
b = lin.intercept_
print(f'The value of b is: {b}')
