# **Importing Data**

---




In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.datasets import make_regression
import plotly.express as px
import seaborn as sns

In [None]:
cal_housing = fetch_california_housing()
X = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)
y = cal_housing.target
Data = pd.DataFrame(dict(MedInc=X['MedInc'], Price=cal_housing.target))

In [None]:
X=Data["MedInc"].to_numpy()
y=Data["Price"].to_numpy()



---



# **ScatterPlot of Data**

---



In [None]:
fig = px.scatter(Data, x="MedInc", y="Price")
fig.show()

---


# **Loss Function**

---



$$
 \hat{y} = \beta_0 + \beta_1 x = {\beta} ^T. X $$

 $$
 L(\beta_0,\beta_1) = \left\{ \begin{array}{cl}
(y_i - \hat{y}_i)^2 & : \ (y_i - \hat{y}_i)^2 \le  \theta \\
\theta & : \ otherwise
\end{array} \right .  $$

##  Loss function that described in the case is a function in piecewise type, when loss function is written, it can be seen that a function similar to the huber loss is obtained as above. Although this loss function returns an equal amount of errors after a certain value:
### *> No derivative in threshold value.*
### *> After threshold value, gradient is equal to 0. Causes **calculation burden** a lot as beta values are random and **wont update**.*

---









# **Defining New Loss Function and Convexity Check**

---


$$
\Large
L(\beta_0,\beta_1,{\theta}) =\ {\theta}\cdot\left(1-\mathrm{e}^{-\frac{\left(y-\beta_0 - \beta_1 x\right)^2}{{\theta}}}\right) 
$$

##  Found an exponentially decreasing function as the loss function and modified it with theta so that errors after theta converge to theta.


In [None]:
def loss_func(y,x,b_0,b_1,theta):
    y_pred=b_0+b_1*x
    err=theta*(1-np.power(np.e,(-(1/theta)*(np.power((y-y_pred).mean(),2)))))
    return err
    
custom_loss=[]
actual_loss=[]
theta=5
for i, K in enumerate(np.linspace(-7, 7, 1000)):
    err=theta*(1-np.power(np.e,(-(1/theta)*(np.power((K),2)))))
    actual_loss.append(K)
    custom_loss.append(err)
l = pd.DataFrame(dict(actual_loss=actual_loss, custom_loss=custom_loss))
fig = px.scatter(l, x="actual_loss", y="custom_loss",width=1000, height=600,color=custom_loss,)
fig.update_xaxes(showline=True, linewidth=3, linecolor='black', mirror=True,showgrid=True, gridwidth=1.1, gridcolor='gray',zeroline=True, zerolinewidth=1.4, zerolinecolor='black')
fig.update_yaxes(showline=True, linewidth=3, linecolor='black', mirror=True,showgrid=True, gridwidth=1.1, gridcolor='gray',zeroline=True, zerolinewidth=1.4, zerolinecolor='black')
fig.update_layout(plot_bgcolor="white",yaxis = dict(range=[-4,12],tickfont = dict(size=20)),xaxis = dict(range=[-10,10],tickfont = dict(size=20)))
config = dict({'scrollZoom': True,'displaylogo':False})
fig.show(config=config)

##  As can be seen from the output above, the function is in a convex structure.

---




# **Gradient Descent Update Calculation ($\nabla $)**

---



In [8]:
X,y

(array([8.3252, 8.3014, 7.2574, ..., 1.7   , 1.8672, 2.3886]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

$
\Large
L(\beta_0,\beta_1,{\theta}) =\ {\theta}\cdot\left(1-\mathrm{e}^{-\frac{\left(y-\beta_0 - \beta_1 x\right)^2}{{\theta}}}\right) 
$

$$ \Large \frac{\partial L}{\partial \beta_0} = -2\left(y-b_0-b_1x\right)\mathrm{e}^{-\frac{\left(y-b_0-b_1x\right)^2}{{\theta}}} \hspace{1cm} \text{and}
\hspace{1cm} \frac{\partial L}{\partial \beta_1} = -2x\cdot\left(y-b_0-b_1x\right)\mathrm{e}^{-\frac{\left(y-b_0-b_1x\right)^2}{{\theta}}} $$ 

## Model organized by update functions:

In [40]:
def custom_loss_model(x, y): # -> np.ndarray:
    theta=5
    beta = np.random.random(2)
    alpha=1.2*10**-2

    for i in range(1000000):
        y_pred= beta[0] + beta[1] * x    
       
        # Update Functions:
        g_b0 = -2 * ((y - y_pred).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) 
        g_b1 = -2 * ((x * (y - y_pred)).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) 

        beta_prev = np.copy(beta)


        beta[0] = beta[0] - alpha * g_b0
        beta[1] = beta[1] - alpha * g_b1
        
        if(i%1000 == 0):
            print(f"iteration: ({i}) beta: {beta}, gradient: {g_b0} {g_b1}")
            
        if np.linalg.norm(beta - beta_prev) < 0.00000001:
            print(f"I do early stoping at iteration {i}")
            break


    return beta

In [41]:
beta=custom_loss_model(X, y)
beta

iteration: (0) beta: [0.66134582 0.35947469], gradient: -0.12773200916690045 -1.0028700503588774
iteration: (1000) beta: [0.45328938 0.4174267 ], gradient: 0.0009093047740524393 -0.00019122907579263823
iteration: (2000) beta: [0.45088352 0.41793266], gradient: 1.0370570728911965e-05 -2.180957049603143e-06
I do early stoping at iteration 2569


array([0.45085794, 0.41793803])

# Minimized beta values:
# **$ \beta_0$: 0.45085577**
# **$ \beta_1$: 0.41793849**

---




# **L2 Regularized Version ($\nabla,\lambda$)**

---

In [42]:
X,y

(array([8.3252, 8.3014, 7.2574, ..., 1.7   , 1.8672, 2.3886]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

$
\Large
L(\beta_0,\beta_1,{\theta}) =\ {\theta}\cdot\left(1-\mathrm{e}^{-\frac{\left(y-\beta_0 - \beta_1 x\right)^2}{{\theta}}}\right) 
$

$ \Large \frac{\partial L}{\partial \beta_0} = -2\left(y-b_0-b_1x\right)\mathrm{e}^{-\frac{\left(y-b_0-b_1x\right)^2}{{\theta}}}+ 2 \lambda\beta_0$  $\Large \frac{\partial L}{\partial \beta_1} = -2x\cdot\left(y-b_0-b_1x\right)\mathrm{e}^{-\frac{\left(y-b_0-b_1x\right)^2}{{\theta}}}+ 2\lambda\beta_1 $ 

In [128]:
def l2r_custom_loss_model(x, y,lam=0.001,alpha=1.2*10**-2): # -> np.ndarray:
    theta=6
    
    beta = np.random.random(2)
    
    for i in range(10000):
        y_pred= beta[0] + beta[1] * x    
        
        g_b0 = -2 * ((y - y_pred).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) + 2 * lam * beta[0]
        g_b1 = -2 * ((x * (y - y_pred)).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) + 2 * lam * beta[1]
        beta_prev = np.copy(beta)


        beta[0] = beta[0] - alpha * g_b0
        beta[1] = beta[1] - alpha * g_b1
        
        if(i%1000 == 0):
            print(f"iteration: ({i}) beta: {beta}, gradient: {g_b0} {g_b1}")
            
        if np.linalg.norm(beta - beta_prev) < 0.000001:
            print(f"I do early stoping at iteration {i}")
            break


    return beta

In [129]:
beta_you_do=l2r_custom_loss_model(X, y, 0.001, 0.0017)
beta_you_do

iteration: (0) beta: [0.74480081 0.2361925 ], gradient: -0.8532327941941544 -4.632882095861681
iteration: (1000) beta: [0.61843588 0.38266955], gradient: 0.06339842925100142 -0.013332852054796193
iteration: (2000) beta: [0.53870483 0.39943719], gradient: 0.03357054576863375 -0.0070599709279215
iteration: (3000) beta: [0.49648881 0.40831533], gradient: 0.017773948407786296 -0.003737906201180117
iteration: (4000) beta: [0.47413793 0.41301577], gradient: 0.009410105494706217 -0.0019789688657707296
iteration: (5000) beta: [0.46230472 0.41550433], gradient: 0.004981969304589967 -0.0010477206634057436
iteration: (6000) beta: [0.45603989 0.41682184], gradient: 0.0026375855965007754 -0.0005546908735135122
iteration: (7000) beta: [0.45272313 0.41751936], gradient: 0.0013964064056142388 -0.0002936677718233153
iteration: (8000) beta: [0.45096715 0.41788865], gradient: 0.0007392937895085866 -0.00015547533901595677
I do early stoping at iteration 8394


array([0.45052928, 0.41798073])

# Minimized beta values for L2 regularized model:
# **$ \beta_0$: 0.45052928**
# **$ \beta_1$: 0.41798073**

---


In [132]:
def model2(x, y, lam, alpha=0.0001) -> np.ndarray:
    print("starting sgd")
    beta = np.random.random(2)

    for i in range(1000):
        y_pred: np.ndarray = beta[0] + beta[1] * x

        g_b0 = -2 * (y - y_pred).sum() + 2 * lam * beta[0]
        g_b1 = -2 * (x * (y - y_pred)).sum() + 2 * lam * beta[1]
        if(i%100 == 0):
            print(f"({i}) beta: {beta}, gradient: {g_b0} {g_b1}")
          

        beta_prev = np.copy(beta)

        beta[0] = beta[0] - alpha * g_b0
        beta[1] = beta[1] - alpha * g_b1

        if np.linalg.norm(beta - beta_prev) < 0.000001:
            print(f"I do early stoping at iteration {i}")
            break

    return beta

In [133]:
beta_we_do=model2(X, y, 0.001, 0.000001)
beta_we_do

starting sgd
(0) beta: [0.45086748 0.07672191], gradient: -54519.54381654423 -261863.58297147602
(100) beta: [0.48265595 0.41125083], gradient: 244.14955097309277 -51.345263237893406
(200) beta: [0.46556899 0.41484426], gradient: 112.96286311789211 -23.756373582420665
(300) beta: [0.45766322 0.41650687], gradient: 52.26554131651978 -10.99157449385167
(400) beta: [0.45400538 0.41727612], gradient: 24.18216689725036 -5.085570381158112
(500) beta: [0.45231298 0.41763204], gradient: 11.188580106831731 -2.352986473038034
(600) beta: [0.45152994 0.41779671], gradient: 5.176720735520349 -1.088677361142271
(700) beta: [0.45116764 0.4178729 ], gradient: 2.395159825257091 -0.5037081216901329
(800) beta: [0.45100002 0.41790815], gradient: 1.1081900843513153 -0.23305515561955556
I do early stoping at iteration 817


array([0.45098132, 0.41791209])

# Minimized beta values for L2 regularized model:
# **$ \beta_0$: 0.45098132**
# **$ \beta_1$: 0.41791209**

---


# **WeDo vs YouDo**

---

In [150]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(Data, x="MedInc", y="Price")

y_pred_we_do=beta_we_do[0]+beta_we_do[1]*X
y_pred_you_do=beta_you_do[0]+beta_you_do[1]*X

l =pd.DataFrame(dict(x=X,y_pred_we_do=y_pred_we_do,y_pred_you_do=y_pred_you_do))

fig1 = px.line(l, x="x", y="y_pred_we_do")
fig1.update_traces(line=dict(color = 'green'))

fig2 = px.line(l, x="x", y="y_pred_you_do")
fig2.update_traces(line=dict(color = 'red'))


fig3 = go.Figure(data=fig.data + fig1.data+ fig2.data)
fig3.show()

array([3.93030246, 3.92035452, 3.48398264, ..., 1.16109652, 1.2309829 ,
       1.44891805])