In [None]:
import pandas as pd
import lazypredict
from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score


train = pd.read_csv("train_clean.csv")
X = train.drop(columns = ["SalePrice"])
y = train["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

reg = RandomForestRegressor()
reg.fit(X_train, y_train)
reg.score(X_test, y_test)

reg = LazyRegressor(verbose = 0, ignore_warnings=True)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
models

In [None]:
import autosklearn
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from autosklearn.regression import AutoSklearnRegressor
from autosklearn.metrics import balanced_accuracy, roc_auc
from sklearn.metrics import r2_score, mean_squared_error

train = pd.read_csv("train_clean.csv")
test = pd.read_csv("test.csv")

X = train.drop(columns = ["SalePrice"])
y = train["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

reg = AutoSklearnRegressor(time_left_for_this_task=5*60, per_run_time_limit=30, metric = r2)
reg.fit(X_train, y_train)

y_pred = reg.predict(X_test)
r2 = r2_score(y_test, y_pred)
rms = mean_squared_error(y_test, y_pred, squared = False)
print(f"R2: {r2} \n RMS: {rms}")

print(reg.sprint_statistics())

from pprint import pprint
pprint(reg.show_models())

import numpy as np
reg.cv_results_['params'][np.argmax(reg.cv_results_['mean_test_score'])]



In [None]:
import optuna
import pandas as pd
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import cross_val_score
import numpy as np

def load_data():
    train = pd.read_csv("train_clean.csv")
    X = train.drop(columns = ["SalePrice"])
    y = train["SalePrice"]
    return X, y


class Objective:

    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.regressors = ["XGB", "SVR", "BAYES"]

    def __call__(self, trial):
        X = np.copy(self.X)
        y = np.copy(self.y)
        params = {}

        reg_name = trial.suggest_categorical("reg_name", self.regressors)

        if reg_name == "XGB":
            params["n_estimators"] = trial.suggest_int("n_estimators", 50, 1000)
            params["max_depth"] = trial.suggest_int("max_depth", 2, 50)
            params["learning_rate"] = trial.suggest_float("learning_rate", 1e-7, 1, log = True)
            params["booster"] = trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"])
            params["reg_alpha"] = trial.suggest_float("reg_alpha", 1e-10, 1e2)
            params["reg_lambda"] = trial.suggest_float("reg_lambda", 1e-10, 1e2)
            params["random_state"] = 42
            params["n_jobs"] = -1
            reg = XGBRegressor(**params, objective = "reg:squarederror")
        elif reg_name == "SVR":
            params["kernel"] = trial.suggest_categorical("kernel", ["linear", "rbf", "sigmoid"])
            params["C"] = trial.suggest_float("C", 1e-6, 1e2)
            params["epsilon"] = trial.suggest_float("epsilon", 1e-2, 0.5)
            params["shrinking"] = trial.suggest_categorical("shrinking", [True, False])
            reg = SVR(**params)
        else:
            params["n_iter"] = trial.suggest_int("n_iter", 100, 1500)
            params["tol"] = trial.suggest_float("tol", 1e-5, 1e-2)
            params["alpha_1"] = trial.suggest_float("alpha1", 1e-10, 1e-2)
            params["alpha_2"] = trial.suggest_float("alpha2", 1e-10, 1e-2)
            params["lambda_1"] = trial.suggest_float("lambda1", 1e-10, 1e-2)
            params["lambda_2"] = trial.suggest_float("lambda2", 1e-10, 1e-2)
            params["fit_intercept"] = trial.suggest_categorical("fit_intercept", [True, False])
            params["normalize"] = trial.suggest_categorical("normalize", [True, False])
            reg = BayesianRidge(**params)

        scores = cross_val_score(reg, X, y, scoring = "r2")
        return scores.mean()

X, y = load_data()
objective = Objective(X, y)

study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 100)
trial = study.best_trial
print(f"Best trial: {trial.value}")
print("Best trial params: ")
for key, value in trial.params.items():
    print(f"{key}: {value}")



