In [None]:
import os
import pandas as pd

import copy, math
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_excel('polynomial_regression_train.xlsx')
df1 = pd.read_csv('polynomial_regression_test.csv')


X_train = df.iloc[1:44000,1:6]
y_train = df.iloc[1:44000,6]

X_CV = df.iloc[44001:47000,1:6]
y_CV = df.iloc[44001:47000,6].values


class PolynomialRegression:
    def __init__(self, degree):
        self.degree = degree
        self.weights = None
        self.num_features = None

    def generate_polynomial_features(self, X):
        """Generates polynomial features up to the specified degree."""
        n_samples, n_features = X.shape
        X_poly = np.ones((n_samples, 1))  # Initialize with a column of 1s (for the intercept)
        for i in range(1, self.degree + 1):
            X_poly = np.concatenate((X_poly, X**i), axis=1)
        if self.num_features is not None:
           if X_poly.shape[1] < self.num_features:
               padding = np.zeros((X_poly.shape[0], self.num_features - X_poly.shape[1]))
               X_poly = np.concatenate((X_poly, padding), axis=1)
           elif X_poly.shape[1] > self.num_features:
               X_poly = X_poly[:, :self.num_features]
        return X_poly

    def fit(self, X, y):
        """Fits the model using the provided data."""
        X_poly = self.generate_polynomial_features(X)
        self.weights = np.linalg.solve(X_poly.T @ X_poly, X_poly.T @ y)  # Normal equation
        self.num_features = X_poly.shape[1]
    def predict(self, X):
        """Predicts target values for new data."""
        X_poly = self.generate_polynomial_features(X)
        return X_poly @ self.weights




# ... (your existing data loading and class definition) ...

def r_squared(y, y_hat):
    """
    Calculates the R-squared (coefficient of determination) score.

    Args:
        y (ndarray): The actual target values.
        y_hat (ndarray): The predicted target values.

    Returns:
        float: The R-squared score.
    """
    y_bar = y.mean()  # Calculate the mean of the actual values
    ss_tot = ((y - y_bar)**2).sum()  # Total sum of squares
    ss_res = ((y_bar - y_hat)**2).sum()  # Residual sum of squares
    return  (ss_res / ss_tot)

y_pred = model.predict(X_CV)
r2 = r_squared(y_CV, y_pred)
print(f"R-squared: {r2}")

# Experiment with different degrees
degrees_to_try = [2, 3, 4, 5, 6,7,8,9,10]  # Try degrees from 2 to 6
best_r2 = -1  # Initialize with a low value
best_degree = None

for degree in degrees_to_try:
    model = PolynomialRegression(degree=degree)
    model.fit(X_train, y_train)
    predictions = model.predict(X_CV)
    r2_score = r_squared(y_CV, predictions)  # Assuming you have the r_squared function defined

    print(f"Degree: {degree}, R-squared: {r2_score}")

    if r2_score > best_r2:
        best_r2 = r2_score
        best_degree = degree

print(f"\nBest degree: {best_degree}, Best R-squared: {best_r2}")

# Use the best degree for your final model
final_model = PolynomialRegression(degree=best_degree)
final_model.fit(X_train, y_train)

# ... (rest of your code) ...



print(f"target label:{y_CV.T[:50]}, predictions: {predictions}")


R-squared: 0.30889534294031173
Degree: 2, R-squared: 0.2631555367646886
Degree: 3, R-squared: 0.2647703667639638
Degree: 4, R-squared: 0.30715450908399583
Degree: 5, R-squared: 0.3073102126494792
Degree: 6, R-squared: 0.3082438301499719
Degree: 7, R-squared: 0.30826364061334766
Degree: 8, R-squared: 0.30875144422365214
Degree: 9, R-squared: 0.30882876575252854
Degree: 10, R-squared: 0.30889534294031173

Best degree: 10, Best R-squared: 0.30889534294031173
target label:[-9.72209577e-11 -4.38856090e-11 -9.61107756e-09 -1.14823627e-08
 -6.69917737e-09  5.64296549e-09  5.29108200e-09  3.54160718e-10
  3.21516219e-10  6.75560880e-10 -2.52357635e-09 -6.88362406e-09
  8.81116498e-09 -7.82275597e-09  4.43537560e-10  4.86895231e-08
 -1.08004617e-08 -1.00470792e-08  3.45631727e-09 -1.04631527e-11
  1.07529626e-08  3.07318930e-10  9.06802230e-10  5.68885603e-09
  2.01711282e-10 -1.11005793e-08  3.14067510e-09 -5.45724277e-09
  2.15448619e-09  2.22491784e-09 -4.87148229e-10  2.26989330e-09
  1.159

In [None]:
def r_squared(y, y_hat):
    """
    Calculates the R-squared (coefficient of determination) score.

    Args:
        y (ndarray): The actual target values.
        y_hat (ndarray): The predicted target values.

    Returns:
        float: The R-squared score.
    """
    y_bar = y.mean()  # Calculate the mean of the actual values
    ss_tot = ((y - y_bar)**2).sum()  # Total sum of squares
    ss_res = ((y_bar - y_hat)**2).sum()  # Residual sum of squares
    return  (ss_res / ss_tot)

y_pred = model.predict(X_CV)
r2 = r_squared(y_CV, y_pred)
print(f"R-squared: {r2}")

R-squared: 0.30889534294031173


In [None]:
# test set prediction

df1 = pd.read_csv('polynomial_regression_test.csv')
X_test = df1.iloc[:,1:-1]
y_pred_test = model.predict(X_test)
print(y_pred_test)

[-3.13749148e-07 -7.52508232e-08 -1.43409853e-09 ...  2.58027056e-08
  3.51988261e-09 -9.38809097e-10]
