In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

from sklearn.datasets import load_iris, load_digits, load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score

In [None]:
# Load the breast cancer dataset
X, y  = load_breast_cancer(return_X_y=True,as_frame=True)

numerical_feature = [feature for feature in X.columns if X[feature].dtypes != 'O']
discrete_feature=[feature for feature in numerical_feature if len(X[feature].unique())<25]
continuous_feature = [feature for feature in numerical_feature if feature not in discrete_feature]
categorical_feature = [feature for feature in X.columns if feature not in numerical_feature]
print("Numerical Features Count {}".format(len(numerical_feature)))
print("Discrete feature Count {}".format(len(discrete_feature)))
print("Continuous feature Count {}".format(len(continuous_feature)))
print("Categorical feature Count {}".format(len(categorical_feature)))

In [149]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

X,y = load_breast_cancer(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)
# X.head()


In [169]:
from sklearn.datasets import fetch_openml
X, y = fetch_openml(data_id=24, as_frame=True, return_X_y=True)
# X, y = mushroom_data['data'], mushroom_data['target']
# X, y = mushroom_data(return_X_y = True)
X

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises%3F,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,k,s,n,f,n,a,c,b,y,e,...,s,o,o,p,o,o,p,b,c,l
8120,x,s,n,f,n,a,c,b,y,e,...,s,o,o,p,n,o,p,b,v,l
8121,f,s,n,f,n,a,c,b,n,e,...,s,o,o,p,o,o,p,b,c,l
8122,k,y,n,f,y,f,c,n,b,t,...,k,w,w,p,w,o,e,w,v,l


: 

In [4]:
from sklearn.datasets import load_iris

In [15]:
#Automatic Hyperparameter Optimization using Ray Tune or Optuna (use this for deep learning)

#OPTUNA

import optuna 
import sklearn 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

def objective(trial):
    iris = load_iris()
    
    # Define the search space for the hyperparameters
    n_estimators = trial.suggest_int('n_estimators', 5, 50, 100)
    max_depth = int(trial.suggest_float('max_depth', 5, 32, log=True))
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2'])
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    #you can add other hyperparameters below
    
    
    # Create the Random Forest classifier model
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                  max_features=max_features, min_samples_split=min_samples_split,
                                  min_samples_leaf=min_samples_leaf)
    
    return cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))



[32m[I 2023-05-06 06:39:39,890][0m A new study created in memory with name: no-name-83278cd6-f961-44d0-a22e-d9f7a3048451[0m

The distribution is specified by [5, 50] and step=100, but the range is not divisible by `step`. It will be replaced by [5, 5].

[32m[I 2023-05-06 06:39:39,937][0m Trial 0 finished with value: 0.9466666666666667 and parameters: {'n_estimators': 5, 'max_depth': 18.895169943434595, 'max_features': 'auto', 'min_samples_split': 8, 'min_samples_leaf': 10}. Best is trial 0 with value: 0.9466666666666667.[0m

The distribution is specified by [5, 50] and step=100, but the range is not divisible by `step`. It will be replaced by [5, 5].

[32m[I 2023-05-06 06:39:39,980][0m Trial 1 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 5, 'max_depth': 31.056732223075787, 'max_features': 'log2', 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9533333333333333.[0m

The distribution is specified by [5, 50] and step=100, 

Accuracy: 0.9733333333333333
Best hyperparameters: {'n_estimators': 5, 'max_depth': 5.53491999822429, 'max_features': 'sqrt', 'min_samples_split': 10, 'min_samples_leaf': 1}


In [16]:
study.best_params 

{'n_estimators': 5,
 'max_depth': 5.53491999822429,
 'max_features': 'sqrt',
 'min_samples_split': 10,
 'min_samples_leaf': 1}

In [11]:
def objective(trial):
    iris = sklearn.datasets.load_iris()
    
    n_estimators = trial.suggest_int('n_estimators', 2, 20)
    max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))
    
    clf = sklearn.ensemble.RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth)
    
    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2023-05-06 06:23:18,695][0m A new study created in memory with name: no-name-90e14488-068b-4987-b1db-e33f38d70d9a[0m
[32m[I 2023-05-06 06:23:18,738][0m Trial 0 finished with value: 0.96 and parameters: {'n_estimators': 7, 'max_depth': 4.337308275206553}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-05-06 06:23:18,830][0m Trial 1 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 19, 'max_depth': 1.0500924068025614}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-05-06 06:23:18,889][0m Trial 2 finished with value: 0.9466666666666667 and parameters: {'n_estimators': 11, 'max_depth': 8.167600596271539}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-05-06 06:23:18,971][0m Trial 3 finished with value: 0.96 and parameters: {'n_estimators': 13, 'max_depth': 4.292154425714685}. Best is trial 0 with value: 0.96.[0m
[32m[I 2023-05-06 06:23:19,037][0m Trial 4 finished with value: 0.96 and parameters: {'n_estimators': 6, 'max_depth': 5.2731356

Accuracy: 0.9733333333333333
Best hyperparameters: {'n_estimators': 12, 'max_depth': 4.490459807827673}


In [7]:
optuna.visualization.plot_optimization_history(study)

In [8]:
optuna.visualization.plot_slice(study)

In [9]:
optuna.visualization.plot_contour(study, params=['n_estimators', 'max_depth'])

In [156]:
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn import set_config

set_config(display='diagram')

# Load the iris dataset
iris = load_iris()

# Create a pipeline
pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('pca', PCA(n_components=2)),
        ('classifier', LogisticRegression())
    ])

# Visualize the pipeline
pipe


In [18]:
import flask