In [None]:
"""
This code adjusts the learning rate to be larger and increases the maximum number of iterations for more extensive training.
These changes may help reduce errors in folds 1, 4, and 5.
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

# Set the data file path
file_path = 'click.csv'

try:
    # Load training data
    train = np.loadtxt(file_path, delimiter=',', dtype='int', skiprows=1)
    train_x = train[:, 0]
    train_y = train[:, 1]
except Exception as e:
    print(f"Error loading data from {file_path}: {e}")
    exit(1)

# Initialize parameters
theta0 = np.random.rand()
theta1 = np.random.rand()

# Define prediction function
def f(x):
    return theta0 + theta1 * x

# Define objective function (error calculation)
def E(x, y):
    return 0.5 * np.sum((y - f(x)) ** 2)

# Define standardization function
mu = train_x.mean()
sigma = train_x.std()

def standardize(x):
    return (x - mu) / sigma

train_z = standardize(train_x)

# Learning rate and initial settings
ETA = 5e-3  # Increase the learning rate
max_iterations = 10000  # Increase the maximum number of iterations
diff = 1
count = 0
error = E(train_z, train_y)

# k-fold cross-validation setup
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
fold_idx = 1

# Cross-validation loop
for train_idx, val_idx in kfold.split(train_z):
    print(f"Fold {fold_idx}:")

    # Prepare training and validation sets
    train_z_fold, val_z_fold = train_z[train_idx], train_z[val_idx]
    train_y_fold, val_y_fold = train_y[train_idx], train_y[val_idx]

    # Training loop (until error difference is below 0.01 or max iterations reached)
    while diff > 1e-2 and count < max_iterations:
        try:
            # Update parameters
            tmp_theta0 = theta0 - ETA * np.sum((f(train_z_fold) - train_y_fold))
            tmp_theta1 = theta1 - ETA * np.sum((f(train_z_fold) - train_y_fold) * train_z_fold)

            # Update parameter values
            theta0, theta1 = tmp_theta0, tmp_theta1

            # Calculate new error
            current_error = E(train_z_fold, train_y_fold)
            diff = error - current_error
            error = current_error

            # Print training log
            count += 1
            log = '{} iterations: theta0 = {:.3f}, theta1 = {:.3f}, difference = {:.4f}'
            print(log.format(count, theta0, theta1, diff))
        except OverflowError as e:
            print(f"Overflow error: {e}")
            break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    # Evaluate performance on validation set
    val_error = E(val_z_fold, val_y_fold)
    print(f"Validation error for fold {fold_idx}: {val_error}\n")

    # Increment fold index
    fold_idx += 1

# Visualize final training result
try:
    x = np.linspace(-3, 3, 100)
    plt.plot(train_z, train_y, 'o')
    plt.plot(x, f(x))
    plt.xlabel('Standardized x')
    plt.ylabel('y')
    plt.title('Linear Regression Fit')
    plt.show()
except Exception as e:
    print(f"Error during plotting: {e}")
