<div>
    <img src="https://storage.googleapis.com/kaggle-datasets-images/1312/2368/23808724f313005d570be372003594fa/dataset-cover.jpg" />
</div>

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

<h1 id="dataset" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

In [None]:
path = '../input/diamonds/diamonds.csv'
df = pd.read_csv(path)
df.drop('Unnamed: 0', inplace=True, axis=1)
df.head()

In [None]:
cut_to_idx = {v:k for k,v in enumerate(df['cut'].unique())}
color_to_idx = {v:k for k,v in enumerate(df['color'].unique())}
clarity_to_idx = {v:k for k,v in enumerate(df['clarity'].unique())}

In [None]:
df['cut'].replace(cut_to_idx, inplace=True)
df['color'].replace(color_to_idx, inplace=True)
df['clarity'].replace(clarity_to_idx, inplace=True)

df.head()

In [None]:
maxes = {}
for col in list(df.columns):
    maxes[col] = df[col].max()
    df[col] /= df[col].max()

In [None]:
df.head()

In [None]:
plt.figure(figsize=(16, 6))

heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True)
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12);

In [None]:
y = df['price'].values
X = df[['x','y','z']].values

<h1 id="activation" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Activation Functions
        <a class="anchor-link" href="#activation" target="_self">¶</a>
    </center>
</h1>

In [None]:
def tanh(x):
    return np.tanh(x)

def d_tanh(x):
    return 1 - np.tanh(x) ** 2

def arctan(x):
    return np.arctan(x)

def d_arctan(x):
    return 1 / (1 + x ** 2)

def iden(x):
    return x

def d_iden(x):
    return 1

def d_abs(x):
    mask = (x >= 0) *1.0
    mask2 = (x<0) * -1.0
    return mask + mask2

<h1 id="parameters" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Parameters
        <a class="anchor-link" href="#parameters" target="_self">¶</a>
    </center>
</h1>

In [None]:
min_range = np.min(X)
max_range = np.max(X)

In [None]:
num_epoch = 200
learning_rate = 0.0004
alpha = 3
X_with_bias = np.insert(X,0,1,axis=1)
X_with_bias[:,[0, 1]] = X_with_bias[:,[1, 0]]
y_with_dim = np.expand_dims(y, axis=1)

In [None]:
theta_with_bias = np.array([np.linspace(min_range , max_range, 1000) for a in range(4)])
theta_with_bias = np.swapaxes(theta_with_bias,0,1)

<h1 id="weights" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Weights
        <a class="anchor-link" href="#weights" target="_self">¶</a>
    </center>
</h1>

In [None]:
np.random.seed(456789)

w1 = np.random.randn(4, 100)
w2 = np.random.randn(100, 104)
w3 = np.random.randn(104, 200)
w4 = np.random.randn(200, 1)

In [None]:
w1_l1,w2_l1,w3_l1,w4_l1 = w1,w2,w3,w4
w1_l2,w2_l2,w3_l2,w4_l2 = w1,w2,w3,w4

w1_l1_reg,w2_l1_reg,w3_l1_reg,w4_l1_reg = w1,w2,w3,w4
w1_l2_reg,w2_l2_reg,w3_l2_reg,w4_l2_reg = w1,w2,w3,w4

w1_l1_l2_reg,w2_l1_l2_reg,w3_l1_l2_reg,w4_l1_l2_reg = w1,w2,w3,w4
w1_l2_l1_reg,w2_l2_l1_reg,w3_l2_l1_reg,w4_l2_l1_reg = w1,w2,w3,w4

<h1 id="l1_norm" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L1 Norm
        <a class="anchor-link" href="#l1_norm" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l1)
    layer_1_act = tanh(layer_1)

    layer_2 = layer_1_act.dot(w2_l1)
    layer_2_act = iden(layer_2)

    layer_3 = layer_2_act.dot(w3_l1)
    layer_3_act = arctan(layer_3)

    layer_4 = layer_3_act.dot(w4_l1)
    layer_4_act = iden(layer_4)

    cost = np.abs(layer_4_act - y_with_dim).sum()  / len(X)
    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = d_abs(layer_4_act - y_with_dim)/ len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l1.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l1.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l1.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  

    w4_l1 = w4_l1 - learning_rate * grad_4
    w3_l1 = w3_l1 - learning_rate * grad_3
    w2_l1 = w2_l1 - learning_rate * grad_2
    w1_l1 = w1_l1 - learning_rate * grad_1

print("Case 1 final error :",cost)
layer_1 = theta_with_bias.dot(w1_l1)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l1)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l1)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l1)
layer_4_l1 = iden(layer_4)

<h1 id="l2_norm" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L2 Norm
        <a class="anchor-link" href="#l2_norm" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l2)
    layer_1_act = tanh(layer_1)

    layer_2 = layer_1_act.dot(w2_l2)
    layer_2_act = iden(layer_2)

    layer_3 = layer_2_act.dot(w3_l2)
    layer_3_act = arctan(layer_3)

    layer_4 = layer_3_act.dot(w4_l2)
    layer_4_act = iden(layer_4)

    cost = np.square(layer_4_act - y_with_dim).sum() / len(X)
    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = 2.0 * (layer_4_act - y_with_dim) / len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l2.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l2.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l2.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  

    w4_l2 = w4_l2 - learning_rate * grad_4 
    w3_l2 = w3_l2 - learning_rate * grad_3 
    w2_l2 = w2_l2 - learning_rate * grad_2 
    w1_l2 = w1_l2 - learning_rate * grad_1

print("Case 2 final error :",cost)    
layer_1 = theta_with_bias.dot(w1_l2)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l2)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l2)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l2)
layer_4_l2 = iden(layer_4)

<h1 id="l1_l1reg" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L1 Norm + L1 Reg
        <a class="anchor-link" href="#l1_l1reg" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l1_reg)
    layer_1_act = tanh(layer_1)

    layer_2 = layer_1_act.dot(w2_l1_reg)
    layer_2_act = iden(layer_2)

    layer_3 = layer_2_act.dot(w3_l1_reg)
    layer_3_act = arctan(layer_3)

    layer_4 = layer_3_act.dot(w4_l1_reg)
    layer_4_act = iden(layer_4)

    cost = np.abs(layer_4_act - y_with_dim).sum()  / len(X) + alpha*(np.abs(w1_l1_reg).sum() +
                                                                    np.abs(w2_l1_reg).sum() +
                                                                    np.abs(w3_l1_reg).sum() +
                                                                    np.abs(w4_l1_reg).sum()  )
    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = d_abs(layer_4_act - y_with_dim)/ len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l1_reg.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l1_reg.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l1_reg.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  


    w4_l1_reg = w4_l1_reg - learning_rate * (grad_4 + alpha *  d_abs(w4_l1_reg))
    w3_l1_reg = w3_l1_reg - learning_rate * (grad_3 + alpha *  d_abs(w3_l1_reg))
    w2_l1_reg = w2_l1_reg - learning_rate * (grad_2 + alpha *  d_abs(w2_l1_reg))
    w1_l1_reg = w1_l1_reg - learning_rate * (grad_1 + alpha *  d_abs(w1_l1_reg))

print("Case 3 final error :",cost) 
layer_1 = theta_with_bias.dot(w1_l1_reg)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l1_reg)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l1_reg)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l1_reg)
layer_4_l1_reg = iden(layer_4)

<h1 id="l2norm_l2reg" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L2 Norm + L2 Reg
        <a class="anchor-link" href="#l2norm_l2reg" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l2_reg)
    layer_1_act = tanh(layer_1)

    layer_2 = layer_1_act.dot(w2_l2_reg)
    layer_2_act = iden(layer_2)

    layer_3 = layer_2_act.dot(w3_l2_reg)
    layer_3_act = arctan(layer_3)

    layer_4 = layer_3_act.dot(w4_l2_reg)
    layer_4_act = iden(layer_4)

    cost = (np.square(layer_4_act - y_with_dim).sum() / len(X)) + alpha * ( np.sum(w4_l2_reg ** 2)  + 
                                                                        np.sum(w3_l2_reg ** 2) +
                                                                        np.sum(w2_l2_reg ** 2) +
                                                                        np.sum(w1_l2_reg ** 2))

    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = 2*(layer_4_act - y_with_dim) / len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l2_reg.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l2_reg.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l2_reg.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  

    w4_l2_reg = w4_l2_reg - learning_rate * (grad_4 + 2*alpha * w4_l2_reg)
    w3_l2_reg = w3_l2_reg - learning_rate * (grad_3 + 2*alpha * w3_l2_reg)
    w2_l2_reg = w2_l2_reg - learning_rate * (grad_2 + 2*alpha * w2_l2_reg)
    w1_l2_reg = w1_l2_reg - learning_rate * (grad_1 + 2*alpha * w1_l2_reg)
    
print("Case 4 final error :",cost) 
layer_1 = theta_with_bias.dot(w1_l2_reg)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l2_reg)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l2_reg)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l2_reg)
layer_4_l2_reg = iden(layer_4)

<h1 id="l1norm_l2reg" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L1 Norm + L2 Reg
        <a class="anchor-link" href="#l1norm_l2reg" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l1_l2_reg)
    layer_1_act = tanh(layer_1)

    layer_2 = layer_1_act.dot(w2_l1_l2_reg)
    layer_2_act = iden(layer_2)

    layer_3 = layer_2_act.dot(w3_l1_l2_reg)
    layer_3_act = arctan(layer_3)

    layer_4 = layer_3_act.dot(w4_l1_l2_reg)
    layer_4_act = iden(layer_4)

    cost = np.abs(layer_4_act - y_with_dim).sum()  / len(X) + alpha * ( np.sum(w4_l1_l2_reg ** 2)  + 
                                                                        np.sum(w3_l1_l2_reg ** 2) +
                                                                        np.sum(w2_l1_l2_reg ** 2) +
                                                                        np.sum(w1_l1_l2_reg ** 2))
    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = d_abs(layer_4_act - y_with_dim)/ len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l1_l2_reg.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l1_l2_reg.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l1_l2_reg.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  


    w4_l1_l2_reg = w4_l1_l2_reg - learning_rate * (grad_4 + 2*alpha * w4_l1_l2_reg)
    w3_l1_l2_reg = w3_l1_l2_reg - learning_rate * (grad_3 + 2*alpha * w3_l1_l2_reg)
    w2_l1_l2_reg = w2_l1_l2_reg - learning_rate * (grad_2 + 2*alpha * w2_l1_l2_reg)
    w1_l1_l2_reg = w1_l1_l2_reg - learning_rate * (grad_1 + 2*alpha * w1_l1_l2_reg)

print("Case 5 final error :",cost) 
layer_1 = theta_with_bias.dot(w1_l1_l2_reg)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l1_l2_reg)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l1_l2_reg)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l1_l2_reg)
layer_4_l1_l2_reg = iden(layer_4)

<h1 id="l2norm_l1reg" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>L2 Norm + L1 Reg
        <a class="anchor-link" href="#l2norm_l1reg" target="_self">¶</a>
    </center>
</h1>

In [None]:
for iter in range(num_epoch):
    
    layer_1 = X_with_bias.dot(w1_l2_l1_reg)
    layer_1_act = tanh(layer_1)
    layer_2 = layer_1_act.dot(w2_l2_l1_reg)
    layer_2_act = iden(layer_2)
    layer_3 = layer_2_act.dot(w3_l2_l1_reg)
    layer_3_act = arctan(layer_3)
    layer_4 = layer_3_act.dot(w4_l2_l1_reg)
    layer_4_act = iden(layer_4)

    cost = (np.square(layer_4_act - y_with_dim).sum() / len(X)) + alpha*(np.abs(w1_l2_l1_reg).sum() +
                                                                    np.abs(w2_l2_l1_reg).sum() +
                                                                    np.abs(w3_l2_l1_reg).sum() +
                                                                    np.abs(w4_l2_l1_reg).sum()  )

    print("Current Iter: ",iter, " current cost: ",cost,end="\r")

    grad_4_part_1 = 2*(layer_4_act - y_with_dim) / len(X)
    grad_4_part_2 = d_iden(layer_4)
    grad_4_part_3 = layer_3_act
    grad_4 =    grad_4_part_3.T.dot(grad_4_part_1*grad_4_part_2) 

    grad_3_part_1 = (grad_4_part_1 * grad_4_part_2).dot(w4_l2_l1_reg.T)
    grad_3_part_2 = d_arctan(layer_3)
    grad_3_part_3 = layer_2_act
    grad_3 =     grad_3_part_3.T.dot(grad_3_part_1 * grad_3_part_2)

    grad_2_part_1 =  (grad_3_part_1 * grad_3_part_2).dot(w3_l2_l1_reg.T)
    grad_2_part_2 = d_iden(layer_2)
    grad_2_part_3 = layer_1_act
    grad_2 =     grad_2_part_3.T.dot(grad_2_part_1*grad_2_part_2)

    grad_1_part_1 =  (grad_2_part_1 * grad_2_part_2).dot(w2_l2_l1_reg.T)
    grad_1_part_2 = d_tanh(layer_1)
    grad_1_part_3 = X_with_bias
    grad_1 =   grad_1_part_3.T.dot(grad_1_part_1 * grad_1_part_2)  

    w4_l2_l1_reg = w4_l2_l1_reg - learning_rate * (grad_4  + alpha *  d_abs(w4_l2_l1_reg))
    w3_l2_l1_reg = w3_l2_l1_reg - learning_rate * (grad_3  + alpha *  d_abs(w3_l2_l1_reg))
    w2_l2_l1_reg = w2_l2_l1_reg - learning_rate * (grad_2  + alpha *  d_abs(w2_l2_l1_reg))
    w1_l2_l1_reg = w1_l2_l1_reg - learning_rate * (grad_1  + alpha *  d_abs(w1_l2_l1_reg))

print("Case 6 final error :",cost)     
layer_1 = theta_with_bias.dot(w1_l2_l1_reg)
layer_1_act = tanh(layer_1)
layer_2 = layer_1_act.dot(w2_l2_l1_reg)
layer_2_act = iden(layer_2)
layer_3 = layer_2_act.dot(w3_l2_l1_reg)
layer_3_act = arctan(layer_3)
layer_4 = layer_3_act.dot(w4_l2_l1_reg)
layer_4_l2_l1_reg = iden(layer_4)

<h1 id="analysis" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Analysis
        <a class="anchor-link" href="#analysis" target="_self">¶</a>
    </center>
</h1>

In [None]:
print("L1 weights absolute sum: ",np.abs(w1_l1).sum(),
                                    np.abs(w2_l1).sum(), 
                                    np.abs(w3_l1).sum(),
                                    np.abs(w4_l1).sum() )

print("L2 weights absolute sum: ",
                        np.abs(w1_l2).sum(),
                        np.abs(w2_l2).sum(), 
                        np.abs(w3_l2).sum(),
                        np.abs(w4_l2).sum())

print("L1 with L1 Reg weights absolute sum:",
                np.abs(w1_l1_reg).sum(),
                np.abs(w2_l1_reg).sum(),
                np.abs(w3_l1_reg).sum(),
                np.abs(w4_l1_reg).sum())

print("L2 with L2 Reg weights absolute sum:",
    np.abs(w1_l2_reg).sum(),
    np.abs(w2_l2_reg).sum(),
    np.abs(w3_l2_reg).sum(),
    np.abs(w4_l2_reg).sum())

print("L1 with L2 Reg weights absolute sum:",
    np.abs(w1_l1_l2_reg).sum(),
    np.abs(w2_l1_l2_reg).sum(),
    np.abs(w3_l1_l2_reg).sum(),
    np.abs(w4_l1_l2_reg).sum())

print("L2 with L1 Reg weights absolute sum:",
    np.abs(w1_l2_l1_reg).sum(),
    np.abs(w2_l2_l1_reg).sum(),
    np.abs(w3_l2_l1_reg).sum(),
    np.abs(w4_l2_l1_reg).sum())

In [None]:
fig = plt.figure(figsize=(14, 7))
plt.plot(theta_with_bias[:,0],layer_4_l1,c='r',linewidth=1, label='L1 Norm')
plt.plot(theta_with_bias[:,0],layer_4_l2,c='g',linewidth=1,label='L2 Norm')
plt.plot(theta_with_bias[:,0],layer_4_l1_reg,c='b',linewidth=1,label='L1 Norm with L1 R eg')
plt.plot(theta_with_bias[:,0],layer_4_l2_reg,c='y',linewidth=1,label='L2 Norm with L2 Reg')
plt.plot(theta_with_bias[:,0],layer_4_l1_l2_reg,c='k',linewidth=1,label='L1 Norm with L2 Reg')
plt.plot(theta_with_bias[:,0],layer_4_l2_l1_reg,c='c',linewidth=1,label='L2 Norm with L1 Reg')
plt.legend()
plt.show()

<h1 id="reference" style="color:white; background:black; border:0.5px dotted white;"> 
    <center>Reference
        <a class="anchor-link" href="#reference" target="_self">¶</a>
    </center>
</h1>

To read more on the mathematics of this implementation : [Towards Science by Jae Duk Seo](https://towardsdatascience.com/only-numpy-implementing-different-combination-of-l1-norm-l2-norm-l1-regularization-and-14b01a9773b)