# Initialization
# One-vs-All Logistic Regression for handwritten digit classification.

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy.optimize import minimize

# Load and prepare data
# Load data from ex3data1.mat

In [None]:
# Load data
data = loadmat('ex3data1.mat')
X = data['X']  # Features (400-dimensional vectorized images)
y = data['y'].flatten()  # Labels (1-10, with 10 representing '0')
m = X.shape[0]  # Number of examples

In [None]:
# Map label '10' to '0'
y[y == 10] = 0

# Visualize data
# Function to display data points as a grid of images

In [None]:
def displayData(X, example_width=20):
    plt.figure(figsize=(6, 6))
    m, n = X.shape
    example_height = n // example_width
    display_rows = int(np.sqrt(m))
    display_cols = m // display_rows
    
    pad = 1
    display_array = -np.ones((pad + display_rows * (example_height + pad),
                              pad + display_cols * (example_width + pad)))

    for i in range(display_rows):
        for j in range(display_cols):
            if i * display_cols + j >= m:
                break
            example = X[i * display_cols + j, :].reshape(example_height, example_width)
            display_array[pad + i * (example_height + pad):pad + i * (example_height + pad) + example_height,
                          pad + j * (example_width + pad):pad + j * (example_width + pad) + example_width] = example / np.max(example)

    plt.imshow(display_array.T, cmap='gray')
    plt.axis('off')
    plt.show()

# Display 100 random data points
rand_indices = np.random.choice(m, 100, replace=False)
sel = X[rand_indices, :]

displayData(sel)


# Sigmoid Function
# Function to compute sigmoid

In [None]:
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost Function with Regularization
# Compute cost and gradient for logistic regression with regularization

In [None]:
def lrCostFunction(theta, X, y, lambda_):
    m = len(y)
    theta = theta[:, np.newaxis]
    y = y[:, np.newaxis]
    predictions = sigmoid(X @ theta)
    cost = -(1 / m) * (y.T @ np.log(predictions) + (1 - y).T @ np.log(1 - predictions))
    reg_term = (lambda_ / (2 * m)) * np.sum(theta[1:] ** 2)
    cost = cost + reg_term

    grad = (1 / m) * (X.T @ (predictions - y))
    grad[1:] += (lambda_ / m) * theta[1:]
    return cost[0, 0], grad.flatten()


# One-vs-All Training
# Train logistic regression classifiers for each digit

In [None]:
def oneVsAll(X, y, num_labels, lambda_):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n + 1))
    X = np.column_stack((np.ones(m), X))

    for i in range(num_labels):
        print(f"Training classifier for digit {i}...")
        initial_theta = np.zeros(n + 1)
        label = (y == i).astype(int)
        result = minimize(lambda t: lrCostFunction(t, X, label, lambda_)[0],
                          initial_theta,
                          jac=lambda t: lrCostFunction(t, X, label, lambda_)[1],
                          options={'maxiter': 50})
        all_theta[i, :] = result.x
    return all_theta

lambda_ = 0.1
num_labels = 10

all_theta = oneVsAll(X, y, num_labels, lambda_)

# Predict with One-vs-All Classifier
# Predict the label for each example using trained classifiers

In [None]:
def predictOneVsAll(all_theta, X):
    m = X.shape[0]
    X = np.column_stack((np.ones(m), X))
    predictions = X @ all_theta.T
    return np.argmax(predictions, axis=1)

# Compute predictions and accuracy
pred = predictOneVsAll(all_theta, X)
accuracy = np.mean(pred == y) * 100

print(f"Training Set Accuracy: {accuracy:.2f}%")


In [None]:
def plotDataAndDecisionBoundary(X, y, theta):
    # Plot data points
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='Admitted')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='Not admitted')
    
    # Plot decision boundary
    x_values = [np.min(X[:, 0]) - 2, np.max(X[:, 0]) + 2]
    y_values = -(theta[0] + np.dot(theta[1], x_values)) / theta[2]
    plt.plot(x_values, y_values, label='Decision Boundary', color='b')
    
    # Add labels and legend
    plt.xlabel('Exam 1 score')
    plt.ylabel('Exam 2 score')
    plt.legend()
    plt.title('Data and Decision Boundary')
    plt.show()

# Chamada da função com os dados e parâmetros
plotDataAndDecisionBoundary(X[:, 1:], y, optimal_theta)


#  Predict and Compute Accuracy
# Predict admission probabilities and calculate accuracy

In [None]:
# Predict function
def predict(theta, X):
    return sigmoid(X @ theta) >= 0.5

prob = sigmoid(np.array([1, 45, 85]) @ optimal_theta)
print(f"For a student with scores 45 and 85, we predict an admission probability of {prob}")

In [None]:
# Compute training set accuracy
p = predict(optimal_theta, X)
accuracy = np.mean(p == y) * 100
print(f"Train Accuracy: {accuracy}")


# Initialization
# Logistic Regression with Regularization for classification problem.

# Load and prepare data
# Load data from ex2data2.txt

In [None]:
# Load data
data = np.loadtxt('ex2data2.txt', delimiter=',')
X = data[:, :2]  # Features
Y = data[:, 2]   # Target: Labels (0 or 1)

# Visualize data
# Function to plot data points

In [None]:
def plotData(X, y):
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='y = 1')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='y = 0')
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.show()

print("Plotting data...")
plotData(X, Y)


# Map Feature Function
# Function to map features to polynomial terms

In [None]:
def mapFeature(x1, x2):
    degree = 6
    out = np.ones(x1.shape[0])[:, np.newaxis]
    for i in range(1, degree + 1):
        for j in range(i + 1):
            term = (x1 ** (i - j)) * (x2 ** j)
            out = np.hstack((out, term[:, np.newaxis]))
    return out

# Map feature
X = mapFeature(X[:, 0], X[:, 1])

# Sigmoid Function
# Function to compute sigmoid

In [None]:
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))



# Cost Function with Regularization
# Compute cost and gradient for logistic regression with regularization

In [None]:
# Cost function with regularization
def costFunctionReg(theta, X, y, lambda_):
    m = len(y)
    predictions = sigmoid(X @ theta)
    cost = -(1 / m) * (y @ np.log(predictions) + (1 - y) @ np.log(1 - predictions))
    reg_term = (lambda_ / (2 * m)) * np.sum(theta[1:] ** 2)
    cost += reg_term

    grad = (1 / m) * (X.T @ (predictions - y))
    grad[1:] += (lambda_ / m) * theta[1:]
    return cost, grad

# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])
lambda_ = 1

# Compute cost and gradient with initial theta
cost, grad = costFunctionReg(initial_theta, X, Y, lambda_)
print(f"Cost at initial theta (zeros): {cost}")
print(f"Gradient at initial theta (zeros): {grad[:5]}")

In [None]:
# Surface plot
J_vals = J_vals.T  # Transpose for correct orientation
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
T0, T1 = np.meshgrid(theta0_vals, theta1_vals)
ax.plot_surface(T0, T1, J_vals, cmap='viridis')
ax.set_xlabel('theta_0')
ax.set_ylabel('theta_1')
ax.set_zlabel('Cost J')
plt.show()

In [None]:
# Contour plot
plt.contour(theta0_vals, theta1_vals, J_vals, levels=np.logspace(-2, 3, 20), cmap='viridis')
plt.xlabel('theta_0')
plt.ylabel('theta_1')
plt.plot(theta[0], theta[1], 'rx', markersize=10, linewidth=2)
plt.show()


# Optimizing Theta
# Use a built-in optimizer to find the optimal theta

In [None]:
# Optimize using scipy.optimize.minimize
result = minimize(lambda t: costFunctionReg(t, X, Y, lambda_)[0],
                  initial_theta,
                  jac=lambda t: costFunctionReg(t, X, Y, lambda_)[1],
                  options={'maxiter': 400})

optimal_theta = result.x
cost_at_optimal_theta = result.fun

print(f"Cost at optimal theta: {cost_at_optimal_theta}")
print(f"Optimal theta: {optimal_theta[:5]}")


# Plot Decision Boundary
# Plot decision boundary using the optimal theta

In [None]:
def plotDataAndDecisionBoundary(X, y, theta):
    # Plot data points
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='y = 1')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='y = 0')
    
    # Create grid for contour plot
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((len(u), len(v)))

    # Evaluate z = theta * features for grid points
    for i in range(len(u)):
        for j in range(len(v)):
            z[i, j] = mapFeature(np.array([u[i]]), np.array([v[j]])).dot(theta).item()  # Use .item() to extract scalar

    # Plot decision boundary
    z = z.T  # Transpose for proper contour plotting
    contour = plt.contour(u, v, z, levels=[0], linewidths=2, colors='g', linestyles='-')
    
    # Add a manual legend entry for the decision boundary
    plt.plot([], [], 'g-', label='Decision Boundary')  # Dummy plot for legend
    
    # Add labels, legend, and title
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.title('Data and Decision Boundary')
    plt.show()

# Call the combined function with data and parameters
print("Plotting data with decision boundary...")
plotDataAndDecisionBoundary(X[:, 1:3], Y, optimal_theta)




# Predict and Compute Accuracy
# Predict probabilities and calculate accuracy

In [None]:
#  Predict function
def predict(theta, X):
    return sigmoid(X @ theta) >= 0.5

p = predict(optimal_theta, X)
accuracy = np.mean(p == Y) * 100
print(f"Train Accuracy: {accuracy}")

### Training data with decision boundary $\lambda$ = 0 

In [None]:
# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])
lambda_ = 0

# Optimize using scipy.optimize.minimize
result = minimize(lambda t: costFunctionReg(t, X, Y, lambda_)[0],
                  initial_theta,
                  jac=lambda t: costFunctionReg(t, X, Y, lambda_)[1],
                  options={'maxiter': 400})

optimal_theta = result.x
cost_at_optimal_theta = result.fun

print(f"Cost at optimal theta: {cost_at_optimal_theta}")
print(f"Optimal theta: {optimal_theta[:5]}")

def plotDataAndDecisionBoundary(X, y, theta):
    # Plot data points
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='y = 1')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='y = 0')
    
    # Create grid for contour plot
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((len(u), len(v)))

    # Evaluate z = theta * features for grid points
    for i in range(len(u)):
        for j in range(len(v)):
            z[i, j] = mapFeature(np.array([u[i]]), np.array([v[j]])).dot(theta).item()  # Use .item() to extract scalar

    # Plot decision boundary
    z = z.T  # Transpose for proper contour plotting
    contour = plt.contour(u, v, z, levels=[0], linewidths=2, colors='g', linestyles='-')
    
    # Add a manual legend entry for the decision boundary
    plt.plot([], [], 'g-', label='Decision Boundary')  # Dummy plot for legend
    
    # Add labels, legend, and title
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.title(r'No regularization (Overfitting) ($\lambda$ = 0)')
    plt.show()

# Call the combined function with data and parameters
print("Plotting data with decision boundary...")
plotDataAndDecisionBoundary(X[:, 1:3], Y, optimal_theta)

### Training data with decision boundary $\lambda$ = 100 

In [None]:
# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])
lambda_ = 100

# Optimize using scipy.optimize.minimize
result = minimize(lambda t: costFunctionReg(t, X, Y, lambda_)[0],
                  initial_theta,
                  jac=lambda t: costFunctionReg(t, X, Y, lambda_)[1],
                  options={'maxiter': 400})

optimal_theta = result.x
cost_at_optimal_theta = result.fun

print(f"Cost at optimal theta: {cost_at_optimal_theta}")
print(f"Optimal theta: {optimal_theta[:5]}")

def plotDataAndDecisionBoundary(X, y, theta):
    # Plot data points
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='y = 1')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='y = 0')
    
    # Create grid for contour plot
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((len(u), len(v)))

    # Evaluate z = theta * features for grid points
    for i in range(len(u)):
        for j in range(len(v)):
            z[i, j] = mapFeature(np.array([u[i]]), np.array([v[j]])).dot(theta).item()  # Use .item() to extract scalar

    # Plot decision boundary
    z = z.T  # Transpose for proper contour plotting
    contour = plt.contour(u, v, z, levels=[0], linewidths=2, colors='g', linestyles='-')
    
    # Add a manual legend entry for the decision boundary
    plt.plot([], [], 'g-', label='Decision Boundary')  # Dummy plot for legend
    
    # Add labels, legend, and title
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.title(r'Too much regularization (Underfitting) ($\lambda$ = 100)')
    plt.show()

# Call the combined function with data and parameters
print("Plotting data with decision boundary...")
plotDataAndDecisionBoundary(X[:, 1:3], Y, optimal_theta)

### Usando método do gradiente

In [None]:
# Gradient descent for logistic regression with regularization
def gradientDescentReg(X, y, theta, alpha, lambda_, num_iters):
    m = len(y)
    J_history = []

    for _ in range(num_iters):
        cost, grad = costFunctionReg(theta, X, y, lambda_)
        theta -= alpha * grad
        J_history.append(cost)

    return theta, J_history

# Initialize fitting parameters
initial_theta = np.zeros(X.shape[1])
lambda_ = 1
alpha = 0.1
num_iters = 5000

# Perform gradient descent
optimal_theta, J_history = gradientDescentReg(X, Y, initial_theta, alpha, lambda_, num_iters)
cost_at_optimal_theta, _ = costFunctionReg(optimal_theta, X, Y, lambda_)

print(f"Cost at optimal theta: {cost_at_optimal_theta}")
print(f"Optimal theta: {optimal_theta[:5]}")

# Plot cost history
plt.plot(range(num_iters), J_history)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('Cost History')
plt.show()

# Cost at optimal theta: 0.5290027422869218
# Optimal theta: [ 1.27268739  0.62557016  1.1809665  -2.01919822 -0.91761468]

In [None]:

# Call the combined function with data and parameters
def plotDataAndDecisionBoundary(X, y, theta):
    # Plot data points
    pos = y == 1
    neg = y == 0
    
    plt.scatter(X[pos, 0], X[pos, 1], c='k', marker='+', label='y = 1')
    plt.scatter(X[neg, 0], X[neg, 1], c='y', marker='o', label='y = 0')
    
    # Create grid for contour plot
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((len(u), len(v)))

    # Evaluate z = theta * features for grid points
    for i in range(len(u)):
        for j in range(len(v)):
            z[i, j] = mapFeature(np.array([u[i]]), np.array([v[j]])).dot(theta).item()  # Use .item() to extract scalar

    # Plot decision boundary
    z = z.T  # Transpose for proper contour plotting
    contour = plt.contour(u, v, z, levels=[0], linewidths=2, colors='g', linestyles='-')
    
    # Add a manual legend entry for the decision boundary
    plt.plot([], [], 'g-', label='Decision Boundary')  # Dummy plot for legend
    
    # Add labels, legend, and title
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.title(r'Using Gradient Descent')
    plt.show()
print("Plotting data with decision boundary...")
plotDataAndDecisionBoundary(X[:, 1:3], Y, optimal_theta)

### Um bom exercício é variar os hiperparâmetros lambda, niters, alpha