In [1]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from scipy.stats import randint

# Load the wine dataset
data = load_wine().data
df = pd.DataFrame(data, columns=load_wine().feature_names)

# Define the features (X) and the target (y)
X = df.drop('alcohol', axis=1)
y = df['alcohol']

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the model and hyperparameter search space
model = RandomForestRegressor(random_state=42)
param_dist = {
    'n_estimators': randint(100, 1000),
    'max_depth': randint(3, 30),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10)
}

# Perform randomized search with cross-validation
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=20, cv=5, random_state=42, n_jobs=-1)

random_search.fit(X_train_scaled, y_train)

# Get the best model and predict
best_model = random_search.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)

# Calculate the improved R² score
r2_best = r2_score(y_test, y_pred_best)
print("Improved R² Score:", r2_best)


Improved R² Score: 0.7476545854256452
