In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
data = iris.data
columns = iris.feature_names
target = iris.target

In [None]:
# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

In [None]:
# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(data_scaled)



In [None]:
# Add cluster labels as new features
data_with_clusters = np.column_stack([data_scaled, cluster_labels])

In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data_scaled, target, test_size=0.2, random_state=42)
X_train_with_clusters, X_test_with_clusters, _, _ = train_test_split(data_with_clusters, target, test_size=0.2, random_state=43)

In [None]:
# Train a Random Forest classifier without cluster labels
model_without_clusters = RandomForestClassifier(random_state=42)
model_without_clusters.fit(X_train, y_train)
y_pred_without_clusters = model_without_clusters.predict(X_test)
accuracy_without_clusters = accuracy_score(y_test, y_pred_without_clusters)

In [None]:
# Train a Random Forest classifier with cluster labels
model_with_clusters = RandomForestClassifier(random_state=42)
model_with_clusters.fit(X_train_with_clusters, y_train)
y_pred_with_clusters = model_with_clusters.predict(X_test_with_clusters)
accuracy_with_clusters = accuracy_score(y_test, y_pred_with_clusters)

In [None]:
# Compare the performance
print(f'Accuracy without cluster labels: {accuracy_without_clusters:.2f}')
print(f'Accuracy with cluster labels: {accuracy_with_clusters:.2f}')

Accuracy without cluster labels: 1.00
Accuracy with cluster labels: 0.33
