In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
file_path = 'Inputs.xlsx'
dataset=pd.read_excel(file_path)
x=dataset.iloc[2:,2:-1].values
y=dataset.iloc[2:,-1].values

In [None]:
print(x)

In [None]:
print(y)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.8,test_size=0.2,random_state=0)


In [None]:
print ('The shape of x_train is:', x_train.shape)
print ('The shape of y_train is: ', y_train.shape)
print ('Number of training examples (m):', len(x_train))

In [None]:
print(f'x_train is numpy array: {isinstance(x_train, np.ndarray)}')
print(f'y_train is numpy array: {isinstance(y_train, np.ndarray)}')

In [None]:
def compute_cost(x, y, w, b,theta,beta):
    """
    The cost function for Cubic regression with eleven variables using a for loop.
    """
    # Number of training examples
    m,n = x.shape

    # Initialize total cost
    total_cost = 0

    # Compute the total cost using a for loop
    for i in range(m):
        f_wb_i = b  # Start with the bias term
        for j in range(n):  # Sum over all features
            f_wb_i += w[j] * x[i, j] + theta[j] * x[i, j]**2 + beta[j] * x[i, j]**3
        total_cost += (f_wb_i - y[i]) ** 2  # Sum of squared errors

    # Compute the average cost
    total_cost = total_cost / (2 * m)

    return total_cost

In [None]:
m,n=x_train.shape
initial_w = np.zeros(n)
initial_theta=np.zeros(n)
initial_beta=np.zeros(n)
initial_b =0.5

cost = compute_cost(x_train, y_train, initial_w, initial_b,initial_theta,initial_beta)
print(type(cost))
print(f'Cost at initial w: {cost:.3f}')


In [None]:
def compute_gradient(x, y, w, b,theta,beta):
    """
    The gradient for Cubic regression with multiple variables.
    """
    # Number of training examples
    m, n = x.shape
  
    # Initialize gradients
    dj_dw = np.zeros(n)
    dj_dtheta = np.zeros(n)
    dj_dbeta = np.zeros(n)
    dj_db = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + np.dot(x[i]**2,theta)+ + np.dot(x[i]**3,beta) + b  # Predicted value
        error = f_wb_i - y[i]  # Error term
        for j in range(n):
            dj_dw[j] += error * x[i, j]  # Gradient for w[j]
            dj_dtheta[j] += error * x[i, j]**2
            dj_dbeta[j] += error * x[i, j]**3
        dj_db += error  # Gradient for b

    # Average the gradients
    dj_dw /= m
    dj_dtheta /= m
    dj_dbeta /= m
    dj_db/= m


    return dj_dw, dj_db,dj_dtheta, dj_dbeta

In [None]:
m,n=x_train.shape
initial_w = np.zeros(n)
initial_theta=np.zeros(n)
initial_beta=np.zeros(n)
initial_b = 0

tmp_dj_dw, tmp_dj_db,tmp_dj_dtheta,tmp_dj_dbeta = compute_gradient(x_train, y_train, initial_w, initial_b,initial_theta,initial_beta)
print(f'dj_dw,dj_dtheta at initial w, b,theta (zeros): {tmp_dj_dw}')
print('Gradient at initial w, b (zeros):', tmp_dj_dw, tmp_dj_db,tmp_dj_dtheta,tmp_dj_dbeta)

In [None]:
def gradient_descent(x, y, w_in, b_in,theta_in, beta_in, cost_function, gradient_function, alpha, num_iters):
    """
    Gradient Descent.
    """
    m, n = x.shape
    # Initialize parameters
    w = copy.deepcopy(w_in)  # Avoid modifying global w within function
    theta=copy.deepcopy(theta_in)
    beta=copy.deepcopy(beta_in)
    b = b_in

    # History of cost and weights
    J_history = []
    w_history = []

    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_dw, dj_db,dj_dtheta,dj_dbeta = gradient_function(x, y, w, b,theta,beta)

        # Update Parameters using w, theta, b, alpha, and gradient
        for j in range(n):
            w[j] = w[j] - alpha * dj_dw[j]
            theta[j] = theta[j] - alpha * dj_dtheta[j]
            beta[j] = beta[j] - alpha * dj_dbeta[j]
        b = b - alpha * dj_db

        # Save cost J at each iteration
        if i < 10000000:  # Prevent resource exhaustion
            cost = cost_function(x, y, w, b,theta,beta)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 100) == 0:
            w_history.append(w.copy())
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):15.2f}")

    return w, b,theta,beta, J_history, w_history  # Return w and J, w history for graphing

In [None]:
import copy
import math
m,n=x_train.shape
iinitial_w = np.array([-5.8, 2.7, 0.741, -10.8, 10.5, -12.54,8.98, -1.04, -0.75, 0.41, -0.7])
initial_theta=np.array([0.000052, 0.000002, 0.00000004, 0.000025, 0.00008, 0.00009, 0.000041, 0.0002, 0.00012, 0.00034, 0.0000045])
initial_beta=np.array([0.0001, 0.0000002, 0.0000001, 0.00000005, 0.0003, 0.003, 0.0000002, 0.0000001, 0.0000004, 0.0035, 0.0015])
initial_b=-40

# learning rate and number of iterations
alpha = 0.000000000000000000000047
iterations = 150000
# Perform gradient descent
w, b, theta,beta,_,_ = gradient_descent(x_train, y_train, initial_w, initial_b,initial_theta, initial_beta,compute_cost, compute_gradient, alpha, iterations)

print("w, b found by gradient descent:", w, b,theta,beta)

In [None]:
print('The shape of x_test is:', x_test.shape)

In [None]:
from IPython.display import FileLink
k=x_test.shape[0]
data=[]
for i in range(k):
        f_wb_i = b  # Start with the bias term
        for j in range(11):  # Sum over all features
            f_wb_i += w[j] * x_test[i, j]+theta[j] * x_test[i, j]**2 + beta[j] * x_test[i, j]**3
        print(f_wb_i, y_test[i])
        row= [f_wb_i,y_test[i]]
        data.append(row)
df=pd.DataFrame(data,columns=['Predicted','Actual'])
file_name='Result_Cubic_Regression2.xlsx'
df.to_excel(file_name, index=False)
FileLink(file_name)
