Tworzę Pipeline dla RandomForestClassifier



In [12]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV


iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('classifier', RandomForestClassifier(random_state=42))  
])

pipeline.fit(X_train, y_train)

predictions = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


param_grid = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20],
    'classifier__min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

print("Najlepsze hiperparametry:", grid_search.best_params_)

best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Dokładność najlepszego modelu:", accuracy)

Accuracy: 1.0
Najlepsze hiperparametry: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50}
Dokładność najlepszego modelu: 1.0


Porównuję z modelem LinearRegression

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

iris = load_iris()
X = iris.data
y = iris.target

pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('regressor', LinearRegression())  
])

pipeline.fit(X_train, y_train)

predictions = pipeline.predict(X_test)

mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.03711379440797689


Obliczam accuracy dla LogisticRegression

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline3 = Pipeline([
    ('scaler', StandardScaler()), 
    ('classifier', LogisticRegression()) 
])

pipeline3.fit(X_train, y_train)

predictions3 = pipeline3.predict(X_test)

accuracy = accuracy_score(y_test, predictions3)
print("Accuracy:", accuracy)


Accuracy: 1.0
