In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from scipy.stats import randint


In [22]:
from sklearn.datasets import load_breast_cancer

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=500),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}


In [23]:
from sklearn.model_selection import cross_validate

scoring = {
    'accuracy': 'accuracy',
    'precision_weighted': 'precision_weighted',
    'recall_weighted': 'recall_weighted',
    'f1_weighted': 'f1_weighted'
}

# Evaluate models
for name, model in models.items():
    scores = cross_validate(model, X, y, cv=5, scoring=scoring)
    print(f"\n {name} ")
    print(f"Mean Accuracy : {scores['test_accuracy'].mean():.4f}")
    print(f"Mean Precision: {scores['test_precision_weighted'].mean():.4f}")
    print(f"Mean Recall   : {scores['test_recall_weighted'].mean():.4f}")
    print(f"Mean F1 Score : {scores['test_f1_weighted'].mean():.4f}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt


 Logistic Regression 
Mean Accuracy : 0.9526
Mean Precision: 0.9540
Mean Recall   : 0.9526
Mean F1 Score : 0.9522

 Random Forest 
Mean Accuracy : 0.9631
Mean Precision: 0.9633
Mean Recall   : 0.9631
Mean F1 Score : 0.9630

 SVM 
Mean Accuracy : 0.9122
Mean Precision: 0.9181
Mean Recall   : 0.9122
Mean F1 Score : 0.9094


In [27]:
param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


grid_search = GridSearchCV(
    estimator=RandomForestClassifier(),
    param_grid=param_grid,
    cv=5,
    scoring='f1_weighted',
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X, y)

print("\n Best Parameters from GridSearchCV:")
print(grid_search.best_params_)

print("\n Best F1 Weighted Score:")
print(f"{grid_search.best_score_:.4f}")


Fitting 5 folds for each of 144 candidates, totalling 720 fits

 Best Parameters from GridSearchCV:
{'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

 Best F1 Weighted Score:
0.9666


In [26]:
# Get best estimator
best_rf = grid_search.best_estimator_

# Re-evaluate using cross-validation
final_scores = cross_validate(best_rf, X, y, cv=5, scoring=scoring)

print("\n Final Evaluation of Tuned Random Forest:")
print(f"Mean Accuracy : {final_scores['test_accuracy'].mean():.4f}")
print(f"Mean Precision: {final_scores['test_precision_weighted'].mean():.4f}")
print(f"Mean Recall   : {final_scores['test_recall_weighted'].mean():.4f}")
print(f"Mean F1 Score : {final_scores['test_f1_weighted'].mean():.4f}")



 Final Evaluation of Tuned Random Forest:
Mean Accuracy : 0.9561
Mean Precision: 0.9568
Mean Recall   : 0.9561
Mean F1 Score : 0.9560


In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# Define parameter distributions
param_dist = {
    'n_estimators': randint(50, 200),        # Number of trees
    'max_depth': [None, 10, 20, 30, 50],     # Tree depth
    'min_samples_split': randint(2, 10),     # Min samples to split
    'min_samples_leaf': randint(1, 5)        # Min samples per leaf
}

# Initialize the model
rf = RandomForestClassifier(random_state=42)

# Create RandomizedSearchCV object
random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=30,             
    cv=5,                   
    scoring='f1_weighted',  
    verbose=1,
    random_state=42,
    n_jobs=-1               
)

# Fit to data
random_search.fit(X, y)

# Show best parameters and score
print("\n Best Parameters from RandomizedSearchCV:")
print(random_search.best_params_)

print("\n Best F1 Weighted Score:")
print(f"{random_search.best_score_:.4f}")


Fitting 5 folds for each of 30 candidates, totalling 150 fits

 Best Parameters from RandomizedSearchCV:
{'max_depth': 30, 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 121}

 Best F1 Weighted Score:
0.9647


In [29]:
# Final evaluation of the best tuned model
best_random_rf = random_search.best_estimator_

final_random_scores = cross_validate(best_random_rf, X, y, cv=5, scoring=scoring)

print("\n Final Evaluation of Tuned Random Forest (RandomizedSearchCV):")
print(f"Mean Accuracy : {final_random_scores['test_accuracy'].mean():.4f}")
print(f"Mean Precision: {final_random_scores['test_precision_weighted'].mean():.4f}")
print(f"Mean Recall   : {final_random_scores['test_recall_weighted'].mean():.4f}")
print(f"Mean F1 Score : {final_random_scores['test_f1_weighted'].mean():.4f}")



 Final Evaluation of Tuned Random Forest (RandomizedSearchCV):
Mean Accuracy : 0.9649
Mean Precision: 0.9653
Mean Recall   : 0.9649
Mean F1 Score : 0.9647


In [None]:
from sklearn.linear_model import LogisticRegression
from scipy.stats import uniform

# Define parameter distribution for Logistic Regression
param_dist_logreg = {
    'C': uniform(0.01, 10),             
    'penalty': ['l2'],                  
    'solver': ['lbfgs', 'saga'],        
    'max_iter': [200, 300, 500]         
}

# Create the RandomizedSearchCV object
random_search_logreg = RandomizedSearchCV(
    estimator=LogisticRegression(),
    param_distributions=param_dist_logreg,
    n_iter=20,               
    cv=5,
    scoring='f1_weighted',
    verbose=1,
    random_state=42,
    n_jobs=-1
)

# Fit to data
random_search_logreg.fit(X, y)

# Print best parameters and best score
print("\n✅ Best Parameters from RandomizedSearchCV (Logistic Regression):")
print(random_search_logreg.best_params_)

print("\n📈 Best F1 Weighted Score:")
print(f"{random_search_logreg.best_score_:.4f}")


Fitting 5 folds for each of 20 candidates, totalling 100 fits


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt


✅ Best Parameters from RandomizedSearchCV (Logistic Regression):
{'C': 7.3299394181140505, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}

📈 Best F1 Weighted Score:
0.9541


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [34]:
# Get best estimator
best_logreg = random_search_logreg.best_estimator_

# Evaluate using cross-validation
logreg_scores = cross_validate(best_logreg, X, y, cv=5, scoring=scoring)

print("\n Final Evaluation of Tuned Logistic Regression:")
print(f"Mean Accuracy : {logreg_scores['test_accuracy'].mean():.4f}")
print(f"Mean Precision: {logreg_scores['test_precision_weighted'].mean():.4f}")
print(f"Mean Recall   : {logreg_scores['test_recall_weighted'].mean():.4f}")
print(f"Mean F1 Score : {logreg_scores['test_f1_weighted'].mean():.4f}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt


 Final Evaluation of Tuned Logistic Regression:
Mean Accuracy : 0.9526
Mean Precision: 0.9538
Mean Recall   : 0.9526
Mean F1 Score : 0.9523


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
