In [None]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the wine dataset
wine = load_wine()
X, y = wine.data, wine.target

# Step 2: Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Hyperparameter tuning using RandomizedSearchCV
param_dist = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_classifier = DecisionTreeClassifier(random_state=42)
random_search = RandomizedSearchCV(dt_classifier, param_distributions=param_dist, n_iter=100, cv=5, random_state=42)
random_search.fit(X_train, y_train)

# Best hyperparameters
best_params = random_search.best_params_

# Train Decision Tree with best hyperparameters
best_dt = DecisionTreeClassifier(**best_params, random_state=42)
best_dt.fit(X_train, y_train)

# Evaluate on test dataset
y_pred = best_dt.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy * 100:.2f}%")


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import ShuffleSplit

# Step 1: Create 10 subsets using ShuffleSplit
n_trees = 10
shuffle_split = ShuffleSplit(n_splits=n_trees, test_size=0.2, random_state=42)

# Step 2: Train 1 decision tree on each subset
forest = []
for train_index, _ in shuffle_split.split(X_train):
    subset_X_train, subset_y_train = X_train[train_index], y_train[train_index]
    tree = DecisionTreeClassifier(**best_params, random_state=42)
    tree.fit(subset_X_train, subset_y_train)
    forest.append(tree)

# Step 3: Evaluate all trees on the test dataset
forest_predictions = [tree.predict(X_test) for tree in forest]

# Calculate overall accuracy
forest_accuracy = accuracy_score(y_test, np.mean(forest_predictions, axis=0).round())
print(f"Random Forest Accuracy: {forest_accuracy * 100:.2f}%")