# **1. Decision Tree**

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:

#  Generate moons dataset with samples
X, y = make_moons(n_samples=10000, noise=0.4, random_state=42)

# Here  Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Grid search for hyperparameter tuning
param_grid = {'max_leaf_nodes': [None, 5, 10, 15, 20]}  # Define hyperparameters to search
tree_clf = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(tree_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Here Best hyperparameters
best_params = grid_search.best_params_

# Here  Train model on full training set using best hyperparameters
best_tree_clf = DecisionTreeClassifier(max_leaf_nodes=best_params['max_leaf_nodes'], random_state=42)
best_tree_clf.fit(X_train, y_train)

# Measuring the model's performance on test set
y_pred = best_tree_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the Decision Tree classifier: {accuracy:.2f}")


Accuracy of the Decision Tree classifier: 0.87


**2**

In [None]:
import numpy as np
from scipy.stats import mode
from sklearn.model_selection import ShuffleSplit
from sklearn.base import clone


In [None]:

# Generate subsets of the training set
n_trees = 1000
n_instances = 100
rs = ShuffleSplit(n_splits=n_trees, test_size=len(X_train) - n_instances, random_state=42)
mini_sets = []

for mini_train_index, _ in rs.split(X_train):
    X_mini_train = X_train[mini_train_index]
    y_mini_train = y_train[mini_train_index]
    mini_sets.append((X_mini_train, y_mini_train))

# Training Decision Trees on each subset
forest = [clone(best_tree_clf) for _ in range(n_trees)]
accuracy_scores = []

for tree, (X_mini_train, y_mini_train) in zip(forest, mini_sets):
    tree.fit(X_mini_train, y_mini_train)
    y_pred = tree.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

print(f"Mean accuracy of individual Decision Trees: {np.mean(accuracy_scores):.4f}")

# Here Generate majority-vote predictions
Y_pred = np.empty([n_trees, len(X_test)], dtype=np.uint8)

for tree_index, tree in enumerate(forest):
    Y_pred[tree_index] = tree.predict(X_test)

y_pred_majority_votes, _ = mode(Y_pred, axis=0)

# Evaluating the majority-vote predictions on test set
accuracy_majority_votes = accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))
print(f"Accuracy of Random Forest classifier: {accuracy_majority_votes:.4f}")


Mean accuracy of individual Decision Trees: 0.8012
Accuracy of Random Forest classifier: 0.8720


**3. MNIST**

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:

# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target']
X = X.astype(np.uint8)
y = y.astype(np.uint8)


  warn(


In [3]:
# Split data into training, validation, and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [4]:

# Initialize classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
et_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(kernel='rbf', probability=True, random_state=42)

In [5]:

# Train classifiers
rf_clf.fit(X_train, y_train)
et_clf.fit(X_train, y_train)
svm_clf.fit(X_train, y_train)


In [6]:
# Evaluate individual classifiers on validation set
for clf in (rf_clf, et_clf, svm_clf):
    y_pred = clf.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    print(f"{clf.__class__.__name__} accuracy on validation set: {accuracy:.4f}")


RandomForestClassifier accuracy on validation set: 0.9692
ExtraTreesClassifier accuracy on validation set: 0.9715
SVC accuracy on validation set: 0.9788


In [7]:
# Create a voting ensemble
voting_clf = VotingClassifier(
    estimators=[('rf', rf_clf), ('et', et_clf), ('svm', svm_clf)],
    voting='hard'
)

# Train the voting classifier on the training set
voting_clf.fit(X_train, y_train)

# Evaluate the voting classifier on the validation set
y_pred_voting = voting_clf.predict(X_val)
accuracy_voting = accuracy_score(y_val, y_pred_voting)
print(f"Voting Classifier accuracy on validation set: {accuracy_voting:.4f}")

# Evaluate on the test set
y_pred_test = voting_clf.predict(X_test)
accuracy_test = accuracy_score(y_test, y_pred_test)
print(f"Voting Classifier accuracy on test set: {accuracy_test:.4f}")


Voting Classifier accuracy on validation set: 0.9744
Voting Classifier accuracy on test set: 0.9713


**4**

In [8]:
# Already trained the model
# Now, Here Generating the  predictions on validation set
rf_pred_val = rf_clf.predict(X_val)
et_pred_val = et_clf.predict(X_val)
svm_pred_val = svm_clf.predict(X_val)

# Creating a new training set with the predictions
X_val_predictions = np.column_stack((rf_pred_val, et_pred_val, svm_pred_val))

# Now Using the predictions as features to train the blender
blender = RandomForestClassifier(n_estimators=100, random_state=42)
blender.fit(X_val_predictions, y_val)

# Now, make predictions on the test set using the individual classifiers
rf_pred_test = rf_clf.predict(X_test)
et_pred_test = et_clf.predict(X_test)
svm_pred_test = svm_clf.predict(X_test)

# Creating a test set with predictions
X_test_predictions = np.column_stack((rf_pred_test, et_pred_test, svm_pred_test))

# Using the blender to get ensemble predictions
ensemble_pred_test = blender.predict(X_test_predictions)

# Evaluating the ensemble on the test set
accuracy_ensemble = accuracy_score(y_test, ensemble_pred_test)
print(f"Stacking Ensemble accuracy on test set: {accuracy_ensemble:.4f}")


Stacking Ensemble accuracy on test set: 0.9706
