Title: GridSearchCV & RandomizedSearchCV

Task 1: GridSearchCV for Decision Trees<br>
Use GridSearchCV to tune max_depth and min_samples_split in Decision Tree for Iris.

In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Define hyperparameters grid for GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7, 10],
        'min_samples_split': [2, 5, 10, 20]
        }

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions with the best model
y_pred = best_model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Accuracy of best model: {accuracy:.2f}")

Best Parameters: {'max_depth': 5, 'min_samples_split': 10}
Accuracy of best model: 1.00


Task 2: RandomizedSearchCV for Random Forest<br>
Apply RandomizedSearchCV to optimize hyperparameters of Random Forest for customer churn.

In [2]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

# Sample customer churn dataset
np.random.seed(42)
size = 1000
tenure = np.random.randint(1, 72, size=size)
charges = np.random.uniform(20, 120, size=size)
total = tenure * charges + np.random.normal(0, 50, size=size)
churn = np.random.choice([0, 1], size=size, p=[0.7, 0.3])

df = pd.DataFrame({'Tenure': tenure, 'Charges': charges, 'Total': total, 'Churn': churn})

X = df[['Tenure', 'Charges', 'Total']]
y = df['Churn']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Define hyperparameter grid for RandomizedSearchCV
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
        'max_depth': [10, 20, 30, None],
            'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4],
                    'bootstrap': [True, False]
                    }

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, 
                                   n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)

# Fit the model
random_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = random_search.best_params_
best_model = random_search.best_estimator_

# Make predictions with the best model
y_pred = best_model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Accuracy of best model: {accuracy:.2f}")


Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END bootstrap=False, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=500; total time=   0.9s
[CV] END bootstrap=False, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=500; total time=   1.0s
[CV] END bootstrap=True, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=   0.6s
[CV] END bootstrap=False, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=500; total time=   1.0s
[CV] END bootstrap=True, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=   0.6s
[CV] END bootstrap=False, max_depth=30, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END bootstrap=True, max_depth=10, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=   0.7s
[CV] END bootstrap=False, max_depth=30, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total tim

Task 3: Fine-Tuning SVR with GridSearchCV<br>
Use GridSearchCV to find best parameters for Support Vector Regression on housing data.

In [3]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

# Sample housing dataset
np.random.seed(42)
size = 1000
sqft = np.random.randint(500, 4000, size=size)
beds = np.random.randint(1, 6, size=size)
price = sqft * 200 + beds * 10000 + np.random.normal(0, 30000, size=size)
df = pd.DataFrame({'Sqft': sqft, 'Beds': beds, 'Price': price})

X = df[['Sqft', 'Beds']]
y = df['Price']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Support Vector Regression model
svr_model = SVR()

# Define hyperparameters grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
        'kernel': ['linear', 'rbf'],
            'gamma': ['scale', 'auto']
            }

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=svr_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions with the best model
y_pred = best_model.predict(X_test)

# Evaluate model performance (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Best Parameters: {best_params}")
print(f"RMSE for the best SVR model: {rmse:.2f}")


Best Parameters: {'C': 100, 'gamma': 'scale', 'kernel': 'linear'}
RMSE for the best SVR model: 30819.98
