In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from hyperopt import fmin, tpe, hp
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [16]:
# 1. Reading data from CSV
def read_csv(file_path):
    return pd.read_csv(file_path)

# 2. Creating features
def create_features(data):
    # No feature creation for this example
    return data

# 3. Training a classifier model
def train_classifier(data):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return model, accuracy

# 4. Hyperparameter tuning with Hyperopt
def objective(params):
    model = RandomForestClassifier(**params)
    score = cross_val_score(model, X, y, cv=5).mean()
    return -score  # Minimize negative accuracy

# 5. Evaluating the model on the test set
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [17]:
file_path = "D:\\DSMP\\DSMP-2\\california_house.csv"
data = read_csv(file_path)

In [18]:
data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25,1665,374.0,845,330,1.5603,INLAND,78100
20636,-121.21,39.49,18,697,150.0,356,114,2.5568,INLAND,77100
20637,-121.22,39.43,17,2254,485.0,1007,433,1.7000,INLAND,92300
20638,-121.32,39.43,18,1860,409.0,741,349,1.8672,INLAND,84700


In [26]:
# Load data
file_path = "D:\\DSMP\\DSMP-2\\california_house.csv"
data = read_csv(file_path)

# Create features
data = create_features(data)

# Split data into features and target
X = data.drop(columns=['ocean_proximity', 'median_house_value'],axis=1)
y = data['median_house_value']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the pipeline
pipeline = Pipeline([
    ('preprocessor', ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), X.columns)
        ],
        remainder='passthrough'
    )),
    ('classifier', RandomForestRegressor())
])

# Train the model
pipeline.fit(X_train, y_train)

# Evaluate the model
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R²:", r2)

#Brute force appraoch try with 2 for loops

# Hyperparameter tuning using Tree of Parzen Estimators (TPE)
space = {
    'n_estimators': hp.choice('n_estimators', range(10, 101)),
    'max_depth': hp.choice('max_depth', range(1, 21))
}

best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100)

# Update the pipeline with the best hyperparameters


# Train the model with the best hyperparameters


# Evaluate the updated model


MSE: 2457913452.7259464
R²: 0.8124315771992449
  0%|                                                                          | 0/100 [00:00<?, ?trial/s, best loss=?]


job exception: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File 

  0%|                                                                          | 0/100 [01:09<?, ?trial/s, best loss=?]


ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 82, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 147, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 188, in _parallel_build_trees
    tree._fit(
    ~~~~~~~~~^
        X,
        ^^
    ...<3 lines>...
        missing_values_in_feature_mask=missing_values_in_feature_mask,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\tree\_classes.py", line 472, in _fit
    builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "sklearn/tree/_tree.pyx", line 141, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 256, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 911, in sklearn.tree._tree.Tree._add_node
  File "sklearn/tree/_tree.pyx", line 880, in sklearn.tree._tree.Tree._resize_c
  File "sklearn/tree/_utils.pyx", line 29, in sklearn.tree._utils.safe_realloc
MemoryError: could not allocate 60506112 bytes

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 82, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 147, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 188, in _parallel_build_trees
    tree._fit(
    ~~~~~~~~~^
        X,
        ^^
    ...<3 lines>...
        missing_values_in_feature_mask=missing_values_in_feature_mask,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\tree\_classes.py", line 472, in _fit
    builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "sklearn/tree/_tree.pyx", line 141, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 256, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 911, in sklearn.tree._tree.Tree._add_node
  File "sklearn/tree/_tree.pyx", line 880, in sklearn.tree._tree.Tree._resize_c
  File "sklearn/tree/_utils.pyx", line 29, in sklearn.tree._utils.safe_realloc
MemoryError: could not allocate 60538880 bytes

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 82, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 147, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 188, in _parallel_build_trees
    tree._fit(
    ~~~~~~~~~^
        X,
        ^^
    ...<3 lines>...
        missing_values_in_feature_mask=missing_values_in_feature_mask,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\tree\_classes.py", line 472, in _fit
    builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "sklearn/tree/_tree.pyx", line 141, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 256, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 911, in sklearn.tree._tree.Tree._add_node
  File "sklearn/tree/_tree.pyx", line 880, in sklearn.tree._tree.Tree._resize_c
  File "sklearn/tree/_utils.pyx", line 29, in sklearn.tree._utils.safe_realloc
MemoryError: could not allocate 60817408 bytes

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 82, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 147, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 188, in _parallel_build_trees
    tree._fit(
    ~~~~~~~~~^
        X,
        ^^
    ...<3 lines>...
        missing_values_in_feature_mask=missing_values_in_feature_mask,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\tree\_classes.py", line 472, in _fit
    builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "sklearn/tree/_tree.pyx", line 141, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 256, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 911, in sklearn.tree._tree.Tree._add_node
  File "sklearn/tree/_tree.pyx", line 880, in sklearn.tree._tree.Tree._resize_c
  File "sklearn/tree/_utils.pyx", line 29, in sklearn.tree._utils.safe_realloc
MemoryError: could not allocate 30310400 bytes

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 486, in fit
    trees = Parallel(
    ...<2 lines>...
        prefer="threads",
    )(
        delayed(_parallel_build_trees)(
    ...<12 lines>...
        for i, t in enumerate(trees)
    )
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 82, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\utils\parallel.py", line 147, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\ensemble\_forest.py", line 188, in _parallel_build_trees
    tree._fit(
    ~~~~~~~~~^
        X,
        ^^
    ...<3 lines>...
        missing_values_in_feature_mask=missing_values_in_feature_mask,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\NAIM\miniconda3\Lib\site-packages\sklearn\tree\_classes.py", line 472, in _fit
    builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "sklearn/tree/_tree.pyx", line 141, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 256, in sklearn.tree._tree.DepthFirstTreeBuilder.build
  File "sklearn/tree/_tree.pyx", line 911, in sklearn.tree._tree.Tree._add_node
  File "sklearn/tree/_tree.pyx", line 880, in sklearn.tree._tree.Tree._resize_c
  File "sklearn/tree/_utils.pyx", line 29, in sklearn.tree._utils.safe_realloc
MemoryError: could not allocate 30179328 bytes


In [None]:
best_params