In [1]:
#import lowess, logistic, gradientboost
from GradientBooster import GradientBooster
from Lowess import Lowess

In [2]:
import pandas as pd
import numpy as np

# !pip install xgboost
import xgboost

from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler,StandardScaler,QuantileTransformer
from scipy.spatial.distance import cdist
from sklearn import linear_model, datasets
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.base import clone

In [3]:
# Gaussian Kernel
def Gaussian(x):
  return np.where(np.abs(x)>4,0,1/(np.sqrt(2*np.pi))*np.exp(-1/2*x**2))
    
# Tricubic Kernel
def Tricubic(x):
  return np.where(np.abs(x)>1,0,(1-np.abs(x)**3)**3)
    
# Epanechnikov Kernel
def Epanechnikov(x):
  return np.where(np.abs(x)>1,0,3/4*(1-np.abs(x)**2))
    
# Quartic Kernel
def Quartic(x):
  return np.where(np.abs(x)>1,0,15/16*(1-np.abs(x)**2)**2)

In [4]:
kernel = Gaussian
tau = 0.5  # You may need to tune this

lowess_model = Lowess(kernel, tau)
gb_model = GradientBooster(lowess_model, Lowess(kernel, tau), n_boosting_steps=5)

In [5]:
data = pd.read_csv('./data/concrete.csv')

X = data.drop(columns='strength').values
y = data['strength'].values

In [6]:
def k_fold_cross_validation(model):
    results = {}

    # list of scalers to iterate through
    scalers = {
    "StandardScaler": StandardScaler(),
    "MinMaxScaler": MinMaxScaler(),
    "QuantileScaler": QuantileTransformer(n_quantiles=min(100, X.shape[0]), output_distribution='normal')
    }

    # initializing kfold cross-validation
    kf = KFold(n_splits=10, shuffle=True)

    for scaler_name, scaler in scalers.items():
        # scaling features using the current scaler
        X_scaled = scaler.fit_transform(X)

        # making list to store mse for each fold
        mse_scores = []

   
        for train_index, test_index in kf.split(X_scaled):
            # splitting into testing and training data
            X_train, X_test = X_scaled[train_index], X_scaled[test_index]
            y_train, y_test = y[train_index], y[test_index]

            # Clone the model for each fold
            model_clone = clone(model)
            
            # Fit the cloned model
            model_clone.fit(X_train, y_train)

            # Predict on test data
            y_pred = model_clone.predict(X_test)
            
            # calculate and append mse
            mse = mean_squared_error(y_test, y_pred)
            mse_scores.append(mse)

        # calculate the mean of the mse for each scaler
        results[scaler_name] = np.mean(mse_scores)
    return results

In [7]:
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=42)

In [8]:
gb_results = k_fold_cross_validation(gb_model)
print("GradientBooster results:")
for scaler, mse in gb_results.items():
    print(f"{scaler}: MSE = {mse}")

ValueError: Model is not fitted. Please call 'fit' before predicting.