In [1]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier, RandomForestClassifier

voting_clf = VotingClassifier(
    estimators=[
        ('svc', SVC(probability=True)),
        ('lgc', LogisticRegression(n_jobs=-1)),
        ('forest', RandomForestClassifier(n_estimators=10, n_jobs=-1, max_depth=4, criterion='gini')),
    ],
    voting='soft'
)

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris(as_frame=False)
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

voting_clf.fit(X_train, y_train)

0,1,2
,estimators,"[('svc', ...), ('lgc', ...), ...]"
,voting,'soft'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100

0,1,2
,n_estimators,10
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [4]:
from sklearn.metrics import accuracy_score
y_pred = voting_clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(acc)

1.0


In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# 1) SVC
svc_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
svc_grid = GridSearchCV(SVC(probability=True), svc_params, cv=5, n_jobs=-1)
svc_grid.fit(X_train, y_train)
best_svc = svc_grid.best_estimator_
print('svc', svc_grid.best_estimator_)

# 2) LogisticRegression
lgc_params = {
    'C': [0.1, 1, 10],
    'solver': ['lbfgs', 'liblinear']
}
lgc_grid = GridSearchCV(LogisticRegression(n_jobs=-1, max_iter=1000), lgc_params, cv=5, n_jobs=-1)
lgc_grid.fit(X_train, y_train)
best_lgc = lgc_grid.best_estimator_
print('lgc', lgc_grid.best_estimator_)

# 3) RandomForest
forest_params = {
    'n_estimators': [10, 50, 100],
    'max_depth': [4, 6, None],
    'criterion': ['gini', 'entropy']
}
forest_grid = GridSearchCV(RandomForestClassifier(n_jobs=-1), forest_params, cv=5, n_jobs=-1)
forest_grid.fit(X_train, y_train)
best_forest = forest_grid.best_estimator_
print('forest', forest_grid.best_estimator_)


svc SVC(C=1, kernel='linear', probability=True)
lgc LogisticRegression(C=1, max_iter=1000, n_jobs=-1)
forest RandomForestClassifier(max_depth=4, n_estimators=50, n_jobs=-1)


In [6]:
from sklearn.ensemble import VotingClassifier

voting_clf2 = VotingClassifier(
    estimators=[
        ('svc', best_svc),
        ('lgc', best_lgc),
        ('forest', best_forest)
    ],
    voting='soft'
)


In [None]:
param_grid = {
    'voting': ['hard', 'soft'],
    'weights': [
        [1, 1, 1],
        [2, 1, 1],
        [1, 2, 1],
        [1, 1, 2]
    ]
}

final_grid = GridSearchCV(voting_clf2, param_grid, cv=5, n_jobs=-1)
final_grid.fit(X_train, y_train)

print("Best params:", final_grid.best_params_)
print("Best score:", final_grid.best_score_)
best_voting_model = final_grid.best_estimator_


Best params: {'voting': 'hard', 'weights': [1, 1, 1]}
Best score: 0.9666666666666666


In [None]:
voting_clf2 = VotingClassifier(
    estimators=[
        ('svc', best_svc),
        ('lgc', best_lgc),
        ('forest', best_forest),
    ],
    voting='hard',
    weights=[1, 1, 1]
)

In [10]:
from sklearn.metrics import accuracy_score

voting_clf2.fit(X_train, y_train)
y_pred = voting_clf2.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(acc)

1.0


In [11]:
import joblib

joblib.dump(voting_clf, 'source/iris_dataset_classifier.joblib')

['source/iris_dataset_classifier.joblib']