<h1><b>Title</b></h1>

<h3>Description of Notebook</h3>

Install / Import Needed Packages

In [None]:
pip install pandas

In [None]:
pip install numpy

In [None]:
pip install matplotlib

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

<h3>Load the Dataset</h3>

In [None]:
df = pd.read_csv('')

Show the first five rows

In [None]:
df.head(5)

<h3>Clean data</h3>

<h3>Find correlations</h3>

In [None]:
correlations = df.corr()
important_features = correlations['diagnosis'].sort_values(ascending=False)

print('Descending Correlation to Diagnosis:')
print()
print(important_features)

<h3>Scatter Plot of Correlation</h3>

In [None]:
plt.scatter()
plt.title('')
plt.xlabel('')
plt.ylabel('')

<h3>Data formatting</h3>

In [None]:
# Top  correlated features of dataset
X = df[['']]
X = X.to_numpy()

# Target Features
y = df[['']]
y = y.to_numpy()

Split into train and test

In [None]:
# Set test size.. between 0-1
test_size = 0.75

split_index = int(len(X) * test_size)
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

<h3>Define Sigmoid</h3>

In [None]:
def sigmoid(z):
    g = 1/(1+np.exp(-z))
    return g

<h3>Compute Cost</h3>

In [7]:
def compute_cost(X,y,w,b,lambda_=1):
    
    m,n = X.shape
    cost = 0
    
    for i in range(m):
        z_i = np.dot(X[i],w)+b
        f_wb_i = sigmoid(z_i)
        cost += -y[i]*np.log(f_wb_i)-(1-y[i])*np.log(1-f_wb_i)

    cost = cost/m

    reg_cost = 0
    for j in range(n):
        reg_cost += (w[j]**2)
    reg_cost = (lambda_/2*m) * reg_cost

    total_cost = reg_cost + cost
    
    return total_cost

<h3>Test Initial Model Parameters Set to Zero</h3>

In [None]:
# Size of examples and features
m,n = X.shape

# Set the initial parameters
w_initial = np.zeros(n)
b_initial = 0.

cost = compute_cost(X,y,w_initial,b_initial)
cost = float(cost)
print('Cost at initial w and b (zeros): {:.3f}'.format(cost))

<h3>Test Model With Non-Zeros</h3>

In [None]:
# Set the initial parameters
w_initial = np.array([])
b_initial = 

cost = compute_cost(X,y,w_initial,b_initial)
cost = float(cost)
print('Cost at initial w and b (zeros): {:.3f}'.format(cost))

<h3>Compute Gradient</h3>

In [None]:
def compute_gradient(X,y,w,b,lambda_=1):
    # Args: X - Data, y - Target Values, w - slope values, b - intercept value, lambda_ - regularization
    
    m = X.shape[0]
    n = len(w)
    
    dj_dw = np.zeros(w.shape)
    dj_db = 0.

    for i in range(m):
        err_i = sigmoid(np.dot(X[i],w)+b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err_i * X[i,j]
        dj_db = dj_db + err_i

    dj_dw = dj_dw / m
    dj_db = dj_db / m

    for j in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m)* w[j]

    return dj_db, dj_dw

<h3>Run Gradient with Zeros</h3>

In [None]:
w_initial = np.array([])
b_initial = 0

dj_db, dj_dw = compute_gradient(X,y,w_initial,b_initial)
print('dj_db at initial w and b:', dj_db)
print('dj_dw at initial w and b:', dj_dw)

<h3>Run Gradient with Non-Zeros</h3>

In [None]:
w_initial = np.array([])
b_initial = 

dj_db, dj_dw = compute_gradient(X,y,w_initial,b_initial)
print('dj_db at initial w and b:', dj_db)
print('dj_dw at initial w and b:', dj_dw)

<h3>Define Gradient Descent</h3>

In [None]:
def gradient_descent(X,y,w_in,b_in,compute_cost,compute_gradient, alpha,num_iters):

    m = len(X)

    J_hist = []
    w_hist = []

    for i in range(num_iters):
        dj_db, dj_dw = compute_gradient(X,y,w_in,b_in)

        w_in = w_in - alpha * dj_dw
        b_in = b_in - alpha * dj_db

        if i < 100000:
            cost = compute_cost(X,y,w_in,b_in)
            J_hist.append(cost)

        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_hist.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_hist[-1]):8.2f}    ")
    return w_in, b_in, J_hist, w_hist

<h3>Run Gradient Descent</h3>

In [None]:
w_init = np.array([])
b_init = 

iterations = 1000
alpha = 0.000001

w,b,J_hist,w_hist = gradient_descent(X_train,y_train,w_init,b_init,compute_cost,compute_gradient,alpha,iterations)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13,4))

ax1.plot(J_hist)
ax1.set_title('Total Cost of Model')
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Cost')

ax2.plot(w_hist)
ax2.set_title('Weight Change Over Time')
ax2.set_xlabel('Iteration')
ax2.set_ylabel('Value')

<h3>Repeat as Needed</h3>

In [None]:
w_init = np.array([])
b_init = 

iterations = 1000
alpha = 0.0000001

w,b,J_hist,w_hist = gradient_descent(X_train,y_train,w_init,b_init,compute_cost,compute_gradient,alpha,iterations)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13,4))

ax1.plot(J_hist)
ax1.set_title('Total Cost of Model')
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Cost')

ax2.plot(w_hist)
ax2.set_title('Weight Change Over Time')
ax2.set_xlabel('Iteration')
ax2.set_ylabel('Value')

plt.show()

<h3>Compute Accuracy of Model on Test Data</h3>

In [None]:
def predict(X,w,b,threshold):
    # Args: X - Data, w - slope weights, b - intercept, threshold - value to set prediction to a 1
    
    m,n = X.shape
    p = np.zeros(m)

    for i in range(m):
        curPred = sigmoid(np.dot(X[i],w)+b)
        if curPred >= threshold:
            p[i] = 1
        else:
            p[i] = 0

    return p

In [None]:
pred = predict(X_test,w,b)

print('Train Accuracy: %f'%(np.mean(pred==y_test) * 100))