In [10]:
import numpy as np
import tensorflow as tf
import math

# Load train and test data
X_train,y_train= np.loadtxt("train.dat", usecols=(0,1), unpack=True)
X_test,y_test = np.loadtxt("test.dat", usecols=(0,1), unpack=True)


#print(X_train,y_train)
# Define the degree range for the polynomial features
degree_range = range(1, 13)

# Define the range of lambda values to test
#lambda_range = [0, np.exp(-25), np.exp(-20), np.exp(-14), np.exp(-7), np.exp(-3), 1, np.exp(3), np.exp(7)]
lambda_range = [0,math.exp(-25),math.exp(-20),
             math.exp(-14),math.exp(-7),
             math.exp(-3),1,math.exp(7),
             math.exp(3)]
# Initialize the minimum RMSE value and corresponding degree and lambda
min_rmse = np.inf
best_d = -1
best_lambda = -1

# Initialize the RMSE values for each degree and lambda value
rmse_values = np.zeros((len(lambda_range), len(degree_range)))

# Compute the polynomial features for each degree
mu_X = np.zeros(6)
sigma_X = np.zeros(6)
mu_y = np.zeros(6)
sigma_y = np.zeros(6)
scores = []
for k in range(6):
    # Does the Cross Validation
    fold_idx = np.arange(k * (len(X_train) // 6), (k+1) * (len(X_train) // 6))
    X_train_fold = np.delete(X_train, fold_idx)
    y_train_fold = np.delete(y_train, fold_idx)

    # Normalize input feature X
    mu_X[k] = np.mean(X_train_fold)
    sigma_X[k] = np.std(X_train_fold)
    X_train_fold = (X_train_fold - mu_X[k]) / sigma_X[k]

    # Normalize output y
    mu_y[k] = np.mean(y_train_fold)
    sigma_y[k] = np.std(y_train_fold)
    y_train_fold = (y_train_fold - mu_y[k]) / sigma_y[k]

    X_val_fold = X_train[fold_idx]
    y_val_fold = y_train[fold_idx]
    #print(y_val_fold)
    # Normalize test input feature X_test using the same mean and standard deviation as X
    X_val_fold = (X_val_fold - mu_X[k]) / sigma_X[k]

    # Normalize test output y_test using the same mean and standard deviation as y
    #y_val_fold = (y_val_fold - mu_y[k]) / sigma_y[k]

    for i, d in enumerate(degree_range):
        # Polynomial Transformation
        X_train_poly = np.ones((len(X_train_fold), d+1))
        X_test_poly = np.ones((len(X_val_fold), d+1))

        for j in range(1, d+1):
            X_train_poly[:, j] = X_train_fold**j
            X_test_poly[:, j] = X_val_fold**j

        for l,lambda_val in enumerate(lambda_range):
            # Define the model
            identity_matrix = np.identity(d+1)
            w = (np.linalg.inv(X_train_poly.T @ X_train_poly + lambda_val * identity_matrix) @ X_train_poly.T @ y_train_fold)
            y_hat = X_test_poly @ w
            y_denormalize = (y_hat * sigma_y[k]) + mu_y[k]
            score = math.sqrt((np.square(y_denormalize-y_val_fold).mean()))
            #print(lambda_val)      
            scores.append([d,k,lambda_val,score])
        

In [11]:
scores

[[1, 0, 0, 0.9481489729807724],
 [1, 0, 1.3887943864964021e-11, 0.9481489729808613],
 [1, 0, 2.061153622438558e-09, 0.9481489729941684],
 [1, 0, 8.315287191035679e-07, 0.9481489783851168],
 [1, 0, 0.0009118819655545162, 0.9481548994468949],
 [1, 0, 0.049787068367863944, 0.9484722129254421],
 [1, 0, 1, 0.9545130386532876],
 [1, 0, 1096.6331584284585, 1.2128010651935077],
 [1, 0, 20.085536923187668, 1.0382949547351756],
 [2, 0, 0, 0.6762325505790561],
 [2, 0, 1.3887943864964021e-11, 0.6762325505793157],
 [2, 0, 2.061153622438558e-09, 0.676232550617818],
 [2, 0, 8.315287191035679e-07, 0.6762325662172274],
 [2, 0, 0.0009118819655545162, 0.6762497015136175],
 [2, 0, 0.049787068367863944, 0.6771735728677168],
 [2, 0, 1, 0.6964778350491222],
 [2, 0, 1096.6331584284585, 1.2160723787998906],
 [2, 0, 20.085536923187668, 0.9669727711008542],
 [3, 0, 0, 0.7911159479860422],
 [3, 0, 1.3887943864964021e-11, 0.7911159479863717],
 [3, 0, 2.061153622438558e-09, 0.7911159480346046],
 [3, 0, 8.3152871910

In [15]:
import pandas as pd
pd.DataFrame(scores).to_excel("ff.xlsx",index = False)

In [4]:
import numpy as np

# Load the training and test data
train= np.loadtxt("train.dat", usecols=(0,1), unpack=True)
test = np.loadtxt("test.dat", usecols=(0,1), unpack=True)

# Define the degree range for the polynomial features
degree_range = range(1, 13)

# Define the range of lambda values to test
lambda_range = [0, np.exp(-25), np.exp(-20), np.exp(-14), np.exp(-7), np.exp(-3), 1, np.exp(3), np.exp(7)]

# Initialize the minimum RMSE value and corresponding degree and lambda
min_rmse = float('inf')
best_d = -1
best_lambda = -1

# Initialize the RMSE values for each degree and lambda value
rmse_values = np.zeros((len(lambda_range), len(degree_range)))

# Extract the features and target variable from the training data
X_train = train[:, 0]
y_train = train[:, 1]
print()
# Extract the features and target variable from the test data
X_test = test[:, 0]
y_test = test[:, 1]

# Compute the polynomial features for each degree
for i, lambda_val in enumerate(lambda_range):
    for j, degree in enumerate(degree_range):
        # Polynomial transformation
        X_train_poly = np.column_stack([X_train ** n for n in range(1, degree + 1)])
        X_test_poly = np.column_stack([X_test ** n for n in range(1, degree + 1)])
        
        # Regularization
        I = np.identity(X_train_poly.shape[1])
        w = np.linalg.inv(X_train_poly.T @ X_train_poly + lambda_val * I) @ X_train_poly.T @ y_train
        
        # Compute the RMSE value
        rmse = np.sqrt(np.mean((X_test_poly @ w - y_test) ** 2))
        rmse_values[i, j] = rmse
        
        # Check if the current model is the best so far
        if rmse < min_rmse:
            min_rmse = rmse
            best_d = degree
            best_lambda = lambda_val

# Print the best model parameters
print('Best degree:', best_d)
print('Best lambda:', best_lambda)
print('Minimum RMSE:', min_rmse)

Best degree: 5
Best lambda: 1
Minimum RMSE: 12.872608291448003
