In [None]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import RandomState

# Generate data
x = np.linspace(1, 20, 100)
np.random.seed(42)
y = []

for i in x:
    if i < 5:
        y.append(0)
    elif 5 < i < 7.5:
        lab = np.random.randint(2)
        y.append(lab)
    elif 7.5 < i < 12.5:
        y.append(1)
    elif 12.5 < i < 15:
        lab = np.random.randint(2)
        y.append(lab)
    else:
        y.append(0)

y = np.array(y)

# Scatter plot
plt.scatter(x, y, c="blue", s=2)
plt.xlabel("x")
plt.ylabel("y")

# Gradient descent function
def gradient_descent(x, y):
    m_curr = b_curr = 0
    iterations = 100000
    n = len(x)
    learning_rate = 0.0002

    for i in range(iterations):
        y_predicted = m_curr * x + b_curr
        cost = (1/n) * sum([val**2 for val in (y - y_predicted)])
        md = -(2/n) * sum(x * (y - y_predicted))
        bd = -(2/n) * sum(y - y_predicted)
        m_curr -= learning_rate * md
        b_curr -= learning_rate * bd

    print(f"m {m_curr}, b {b_curr}, iteration {i}, cost {cost}")
    return m_curr, b_curr, cost

m, b, cost = gradient_descent(x, y)

# Plot the linear regression line
plt.plot(x, m * x + b, color='green')

# Calculate mean and variance
mean = np.mean(x)
std = np.std(x)
var = std**2

# Calculate Gaussian values
y_gaussian = (np.exp((-1 * (m * x + b - mean)**2) / (2 * var))) / ((2 * np.pi * var)**0.5)
plt.scatter(x, y_gaussian, color='red')

# Initialize m and b for NLL
m = 0
b = 0

# Negative log-likelihood function
def gaussian_nll(b, m):
    pred_y = (np.exp((-1 * (m * x + b - mean)**2) / (2 * var))) / ((2 * np.pi * var)**0.5)
    loss = 0.5 * np.sum(np.log(2 * np.pi * var) + ((y - pred_y)**2) / var)
    return loss

# Gradient descent for NLL
iterations = 5000
learning_rate = 0.001

for i in range(iterations):
    y_pred = (np.exp((-1 * (m * x + b - mean)**2) / (2 * var))) / ((2 * np.pi * var)**0.5)
    md = np.sum((y - y_pred) * (x * (m * x + b - mean)) / var)
    bd = np.sum((y - y_pred) * ((m * x + b - mean)) / var)

    b -= learning_rate * bd
    m -= learning_rate * md
    loss = gaussian_nll(b, m)

    if i % 100 == 0:  # Print loss every 100 iterations
        print(loss)

print(f'm {m}, b {b}, loss {loss}')

# Final plot
plt.scatter(x, y_gaussian, color='red')
plt.scatter(x, y, c="blue", s=2)
plt.show()
