In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

# Set random seed for reproducibility
np.random.seed(0)

# Generate synthetic data for two classes (0 < x < 10 and 0 < y < 20)
num_samples = 1000
x = np.random.uniform(0, 10, num_samples)
y = np.random.uniform(0, 20, num_samples)

# Create labels based on the decision boundary y = 2x + 3
labels = (y >= (2 * x + 3)).astype(int)

# Create a scatter plot to visualize the data
plt.figure(figsize=(8, 6))
plt.scatter(x, y, c=labels, cmap=plt.cm.Paired)
plt.plot([0, 10], [3, 23], 'k--', lw=2, label='Decision Boundary (y = 2x + 3)')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Synthetic Data with Decision Boundary')
plt.legend()
plt.grid(True)

# Create a linear regression model
linear_reg = LinearRegression()

# Perform cross-validation to estimate the optimal 'a' (slope) and 'b' (intercept) values
a_values = np.linspace(1, 4, 100)  # Range of 'a' values to try
b_values = np.linspace(1, 4, 100)  # Range of 'b' values to try

best_score = -1
best_a = None
best_b = None

for a in a_values:
    for b in b_values:
        # Create synthetic data points based on the current 'a' and 'b' values
        synthetic_y = a * x + b
        # Calculate the mean squared error between the synthetic 'y' and actual 'y'
        mse = np.mean((synthetic_y - y) ** 2)
        if mse > best_score:
            best_score = mse
            best_a = a
            best_b = b

print(f"Optimal 'a' (slope): {best_a:.2f}")
print(f"Optimal 'b' (intercept): {best_b:.2f}")
print(f"Best Mean Squared Error: {best_score:.2f}")

# Visualize the optimal decision boundary
plt.plot([0, 10], [best_b, best_b + best_a * 10], 'r-', lw=2, label='Optimal Decision Boundary')
plt.legend()
plt.show()
