# Bài tập 2


Triệu Nhật Minh - 21127112

---

In [101]:
import numpy as np
%matplotlib inline

## 1 - 2: Hoeffding Inequality

In [102]:
n_coins = 1000
n_flips = 10
mu = 0.5
n_exps = 100000

result = np.random.binomial(n_flips, mu, (n_exps, n_coins)) / n_flips

v_1 = result[:, 0]
v_rand = result[np.arange(n_exps), np.random.choice(n_coins, size=n_exps)]
v_min = np.min(result, axis=1)

np.mean(v_min)

0.037717

In [103]:
def is_hoeffding_coin(results):
    # Calculate the sum of the results
    sum_results = sum(results)

    # Calculate the expected value of the results
    expected_value = len(results) * 0.5  # Assuming a fair coin

    # Calculate the deviation
    deviation = abs(sum_results - expected_value)

    # Check if the deviation is less than or equal to the bound given by the Hoeffding Inequality
    # This is a simplified version of the inequality and might not be accurate for your use case
    return deviation <= np.sqrt(len(results) * np.log(2) / 2)

print("First coin is Hoeffding?", is_hoeffding_coin(v_1))
print("Random coin is Hoeffding?", is_hoeffding_coin(v_rand))
print("Min coin is Hoeffding?", is_hoeffding_coin(v_min))

First coin is Hoeffding? True
Random coin is Hoeffding? True
Min coin is Hoeffding? False


## 3 - 4: Error and Noise

$\mathbb{P}(y|\text{x}) = \begin{cases} \lambda & y = f(x) \\ 1 - \lambda & y \neq f(x) \end{cases}$


In the context of binary classification, where the output `y` can take on two possible values (let's say -1 and 1), there are typically only two error cases to consider:

1. The classifier (or hypothesis) `h` predicts `y = f(x)` (a correct prediction) with probability $1 - \mu$, but the actual `y` is not equal to `f(x)` with the probability $1 - \lambda$. This situation is referred to as a ***false accept***. The probability of this happening is $(1 - \lambda) * (1 - \mu)$

2. The classifier `h` predicts `y ≠ f(x)` (incorrect prediction) with probability $\mu$, but the actual `y` is equal to `f(x)` with the probability $\lambda$. This situation is referred to as a ***false reject***. The probability of this happening is $\lambda * \mu$

These two cases cover all the possible error scenarios for a binary classifier. Therefore, the probability of error is the sum of the probabilities of these two cases:

$$\mathbb{P}(\text{error}) = (1 - \lambda) * (1 - \mu) + \lambda * \mu$$

## 3
The correct answer is [e] $(1 - \lambda) * (1 - \mu) + \lambda * \mu$

## 4

We have the probability of error that $h$ makes in approximating $y$:

$$\begin{align*}
\mathbb{P}(\text{error}) &= (1 - \lambda) * (1 - \mu) + \lambda * \mu \\
&= 1 - \mu - \lambda + 2\mu\lambda\\
&= \mu(2\lambda - 1) + 1 - \lambda
\end{align*}$$

if $\lambda = 0.5$, then $\mathbb{P}(\text{error}) = \frac{1}{2}$

The correct answer is [b] 0.5

## 5 - 7: Linear Regression

Hàm phát sinh ra `target_w`, véc-tơ tham số của $f$

In [104]:
def generate_target_w(): # Code from HW1
    """
    Generates target_w from two random, uniformly distributed points in [-1, 1] x [-1, 1].
    
    Returns
    -------
    target_w : numpy array, shape (3, 1) 
        The vector of parameters of f.
    """
    # Generate two points from a uniform distribution over [-1, 1]x[-1, 1]
    p1 = np.random.uniform(-1, 1, 2)
    p2 = np.random.uniform(-1, 1, 2)
    # Compute the target W from these two points
    target_w = np.array([p1[1] * p2[0] - p1[0] * p2[1], p2[1] - p1[1], p1[0] - p2[0]]).reshape((-1, 1))
    
    return target_w

Hàm phát sinh ra tập dữ liệu

In [105]:
def generate_data(N, target_w): # Code from HW1
    """
    Generates a data set by generating random inputs and then using target_w to generate the 
    corresponding outputs.
    
    Parameters
    ----------
    N : int
        The number of examples.
    target_w : numpy array, shape (3, 1) 
        The vector of parameters of f.
    
    Returns
    -------
    X : numpy array, shape (N, 3)
        The matrix of input vectors (each row corresponds to an input vector); the first column of 
        this matrix is all ones.
    Y : numpy array, shape (N, 1)
        The vector of outputs.        
    """
    X = np.random.uniform(-1, 1, (N, 2))
    X = np.hstack((np.ones((N, 1)), X)) # Add 'ones' column
    Y = np.sign(np.dot(X, target_w))
    
    return X, Y

Hàm chạy Linear Regression

In [106]:
def run_LinearRegression(X, Y):
    """
    Runs the Linear Regression algorithm on X, Y.
    
    Parameters
    ----------
    X : numpy array, shape (N, 3)
        The matrix of input vectors (each row corresponds to an input vector); the first column of 
        this matrix is all ones.
    Y : numpy array, shape (N, 1)
        The vector of outputs.
        
    Returns
    -------
    w : numpy array, shape (3, 1) 
        The vector of parameters of g.
    """
    # Calculate w using the pseudo-inverse
    w = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), Y)
    
    return w

Hàm main

In [107]:
def main_LR():
    """
    Returns
    -------
    w: numpy array, shape (3, 1)
        The vector of parameters of g found by Linear Regression.
    """
    num_runs = 1000 # Number of experiments to run

    avg_Ein = 0.0 # The average in-sample error of g
    
    avg_Eout = 0.0 # The average out-of-sample error of g
    
    # Number of in-sample points
    n_in = 100
    
    # Number of out-of-sample points
    n_out = 1000
    
    for _ in range(num_runs):
        # Generate target_w
        target_w = generate_target_w()
        # Generate training set
        X_in, y_in = generate_data(n_in, target_w)
        # Generate out-of-sample dataset
        X_out, y_out = generate_data(n_out, target_w)
        
        # Run Linear Regression to pick g
        w = run_LinearRegression(X_in, y_in)
        
        # Predict on training set with found w
        predictions_in = np.dot(X_in, w)
        # Predict on out-of-sample set with found w
        predictions_out = np.dot(X_out, w)
        
        # Compute binary error between y_in/ y_out - correct output & predictions
        Ein = np.mean(y_in != np.sign(predictions_in))
        Eout = np.mean(y_out != np.sign(predictions_out))
        
        # Update average error
        avg_Ein += (Ein * 1.0 / num_runs)
        avg_Eout += (Eout * 1.0 / num_runs)
    
    print("Average Ein: ", avg_Ein)
    print("Average Eout: ", avg_Eout)

## 5 - 6

In [108]:
main_LR()

Average Ein:  0.038049999999999834
Average Eout:  0.048591999999999996


[5] The correct answer is [c] 0.01

[6] The correct answer is [c] 0.01

## 7

Hàm chạy PLA

In [109]:
def run_PLA(X, Y): # Code from HW1, modified w to be the output of Linear Regression
    """
    Runs PLA.
    
    Parameters
    ----------
    X : numpy array, shape (N, 3)
        The matrix of input vectors (each row corresponds to an input vector); the first column of 
        this matrix is all ones.
    Y : numpy array, shape (N, 1)
        The vector of outputs.
    w : numpy array, shape (3, 1) 
        The vector of parameters of g found by Linear Regression.
        
    Returns
    -------
    num_iterations : int
        The number of iterations PLA takes to converge.
    """
    w = run_LinearRegression(X, Y)
    num_iterations = 0
    
    while True:
        misclassified_indices = np.where(np.sign(np.dot(X, w)) != Y)[0] # Find misclassified indices
        if misclassified_indices.size == 0:
            break
        i = np.random.choice(misclassified_indices) # Stochastic Gradient Descent
        w += Y[i] * X[i].reshape(X.shape[1], 1) # Update w
        num_iterations += 1  
    
    return num_iterations

In [110]:
def main_PLA(N):
    """
    Parameters
    ----------
    N : int
        The number of training examples.
    """
    num_runs = 1000
    # The average number of iterations PLA takes to converge
    avg_num_iterations = 0.0
    
    for _ in range(num_runs):
        # Generate target_w
        target_w = generate_target_w()
        
        # Generate training set
        X, Y = generate_data(N, target_w)
        
        # Run PLA to completely separates all the in-sample points
        num_iterations = run_PLA(X, Y)
        
        # Update average num_iterations
        avg_num_iterations += (num_iterations * 1.0 / num_runs)
    
    # Print results
    print('avg_num_iterations = %f' % (avg_num_iterations))

In [111]:
main_PLA(N=10)

avg_num_iterations = 3.965000


[7] The correct answer is [a] 1

## 8 - 10: Nonlinear Transformation

$$f(x_1, x_2) = \text{sign}(x_1^2 + x_2^2 - 0.6)$$

In [112]:
def generate_data_with_noise(N, noise):
    """
    Generates a data set with a given number of examples and a given noise level.
    
    Parameters
    ----------
    N : int
        The number of examples.
    noise : float
        The probability that a label is flipped.
    
    Returns
    -------
    X : numpy array, shape (N, 3)
        The matrix of input vectors (each row corresponds to an input vector); the first column of this matrix is all ones.
    Y : numpy array, shape (N, 1)
        The vector of outputs.        
    """
    X = np.random.uniform(-1, 1, (N, 2))
    X = np.hstack((np.ones((N, 1)), X)) # Add 'ones' column
    Y = np.sign(X[:, 1]**2 + X[:, 2]**2 - 0.6)
    
    # Flip the sign of the output for a randomly selected noise subset of the points
    num_flips = int(noise * N)
    flip_indices = np.random.choice(N, num_flips, replace=False)
    Y[flip_indices] = -Y[flip_indices]
    
    return X, Y

## 8

In [113]:
def main_NT_8(N):
    """
    Parameters
    ----------
    N : int
        The number of training examples.
        
    Returns
    -------
    avg_Ein : float
        The average in-sample error of g.
    """
    num_runs = 1000 # Number of experiments to run
    avg_Ein = 0.0 # The average in-sample error of g
    
    for _ in range(num_runs):
        # Generate training set
        X, Y = generate_data_with_noise(N, noise=0.1)
        
        # Run Linear Regression to pick g
        w = run_LinearRegression(X, Y)
        
        # Predict on training set with found w
        predictions = np.dot(X, w)
        
        # Compute binary error between y - correct output & predictions
        Ein = np.mean(Y != np.sign(predictions))
        
        # Update average error
        avg_Ein += (Ein * 1.0 / num_runs)
        
    print("Average Ein: ", avg_Ein)

In [114]:
main_NT_8(N=1000)

Average Ein:  0.5074069999999996


[8] The correct answer is [d] 0.5

## 9

Hàm chuyển đổi vector đặc trưng sang vector đặc trưng phi tuyến

$$z = (1, x_1, x_2, x_1x_2, x_1^2, x_2^2)$$

In [115]:
def transform_non_linear(X, Y):
    """
    Parameters
    ----------
    X : numpy array, shape (N, 3)
        The matrix of input vectors (each row corresponds to an input vector); the first column of this matrix is all ones.
    Y : numpy array, shape (N, 1)
        The vector of outputs.
        
    Returns
    -------
    Z : numpy array, shape (N, 6)
        The transformed matrix of input vectors.
    """
    # Transform X into Z
    Z = np.hstack((X, (X[:, 1] * X[:, 2]).reshape(-1, 1), (X[:, 1]**2).reshape(-1, 1), (X[:, 2]**2).reshape(-1, 1)))    
    
    return Z, Y

In [116]:
def main_NT_9(N):
    """
    Parameters
    ----------
    N : int
        The number of training examples.
        
    Returns
    -------
    avg_w : numpy array, shape (6, 1)
        The average w found by Linear Regression.
    """
    num_runs = 1000 # Number of experiments to run

    avg_w = 0 # The average w found by Linear Regression
        
    for _ in range(num_runs):
        X, Y = generate_data_with_noise(N, noise=0.1)
        
        # Transform X into Z
        Z, Y = transform_non_linear(X, Y)
        
        # Predict on training set with found w
        avg_w += (run_LinearRegression(Z, Y) * 1.0 / num_runs)
        
    return np.array(avg_w)

In [117]:
avg_w = main_NT_9(N=1000)

In [118]:
def find_closest_hypothesis(avg_w, hypotheses):
    """
    Finds the hypothesis that agrees the most with the solution of Linear Regression.
    
    Parameters
    ----------
    avg_w : numpy array, shape (6, 1)
        The average w found by Linear Regression.
    hypotheses : list of numpy arrays
        The list of hypotheses to choose from.
    
    Returns
    -------
    closest_hypothesis : numpy array
        The hypothesis that agrees the most with the solution of Linear Regression.
    """
    closest_hypothesis = None # The hypothesis that agrees the most with the solution of Linear Regression
    
    min_distance = 0.0 # The distance between the solution of Linear Regression and the closest hypothesis
    
    for _ in range(len(hypotheses)):
        # Calculate the distance between avg_w and the current hypothesis
        distance = np.linalg.norm(avg_w - hypotheses[_])
        
        # Update closest_hypothesis and min_distance
        if closest_hypothesis is None or distance < min_distance:
            closest_hypothesis = hypotheses[_]
            min_distance = distance
    
    return closest_hypothesis

In [119]:
hypotheses = [
    [-1, -0.05, 0.08, 0.13, 1.5, 1.5], # a
    [-1, -0.05, 0.08, 0.13, 1.5, 15], # b
    [-1, -0.05, 0.08, 0.13, 15, 1.5], # c
    [-1, -1.5, 0.08, 0.13, 0.05, 0.05], # d
    [-1, -0.05, 0.08, 1.5, 0.15, 0.15] # e
]

# Convert to numpy arrays

hypotheses = [np.array(hypothesis) for hypothesis in hypotheses]

closest_hypothesis = find_closest_hypothesis(avg_w, hypotheses)

print("Closest hypothesis: ", closest_hypothesis)

Closest hypothesis:  [-1.   -0.05  0.08  0.13  1.5   1.5 ]


[9] The correct answer is [a] $g(x_1, x_2) = \text{sign}(-1 - 0.05x_1 + 0.08x_2 + 0.13 x_1x_2 + 1.5x_1^2 + 1.5x_2^2)$

## 10

In [120]:
def main_NT_10(avg_w, N):
    """
    Calculates the out-of-sample error of the hypothesis found in the previous problem.
    
    Parameters
    ----------
    avg_w : numpy array, shape (6, 1)
        The average w found by Linear Regression.
    N : int
        The number of training examples.
        
    Returns
    -------
    avg_Eout : float
        The average out-of-sample error of the hypothesis found in the previous problem.
    """
    num_runs = 1000 # Number of experiments to run

    avg_Eout = 0.0 # The average out-of-sample error of the hypothesis found in the previous problem
        
    for _ in range(num_runs):
        X, Y = generate_data_with_noise(N, noise=0.1)
        
        # Transform X into Z
        Z, Y = transform_non_linear(X, Y)
        
        # Predict on training set with found w
        predictions = np.dot(Z, avg_w)
        
        # Compute binary error between y - correct output & predictions
        Eout = np.mean(Y != np.sign(predictions))
        
        # Update average error
        avg_Eout += (Eout * 1.0 / num_runs)
        
    print("Average Eout: ", avg_Eout)

In [124]:
main_NT_10(avg_w = avg_w, N=1000)

Average Eout:  0.12275299999999986


[10] The correct answer is [b] 0.1

## References
1. [hthoai - GitHub](https://github.com/hthoai/machine-learning/blob/master/homework/hw2/hw2-sol.ipynb)
2. [homefish - GitHub](https://github.com/homefish/edX_Learning_From_Data_2017/blob/master/homework_2/homework_2_problem_3_4_Error_and_Noise.ipynb)