In [111]:
#Importing the packages

from sklearn.datasets import make_moons
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score
from scipy.stats import mode

In [3]:
#Importing the dataset Make_Moons

data=make_moons(n_samples=10000,noise=0.4)
X=np.array(data[0])
y=np.array(data[1])
y=y.reshape([10000,1])
y.shape

(10000, 1)

In [4]:
#Scalling the Data

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.astype(np.float64))


In [5]:
#Dividing the data into train and test data

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=0)

In [7]:
#Defining the random state of the Decision Tree for reproducibilty

dec_clf=DecisionTreeClassifier(random_state=100)

In [30]:
#Doing a Grid Search for regularising the tree

param_grid = { 
    'min_samples_split': [3,4,5,6,7,8],
    'max_depth' : [3,4,5,6,7,8,9,10],
    'max_leaf_nodes' : [3,4,5,6,7,8]
}
CV_dec = GridSearchCV(estimator=dec_clf, param_grid=param_grid, cv= 5)
dec_cv_fit=CV_dec.fit(X_train,y_train)

In [32]:
#Finding the best parameter of the Grid Search

dec_cv_fit.best_params_

{'max_depth': 3, 'max_leaf_nodes': 4, 'min_samples_split': 3}

In [33]:
#Training with the best parameters

dec_clf=DecisionTreeClassifier(random_state=100,max_depth=3,max_leaf_nodes=4,min_samples_split=3)
dec_clf.fit(X_train,y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
                       max_features=None, max_leaf_nodes=4,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=3,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=100, splitter='best')

In [34]:
#Predicting the values

y_test_fit=dec_clf.predict(X_test)

In [113]:
#Finding the values of the different metrics

print("Cross-Validated Accuracy on 3 cv sets:",cross_val_score(dec_clf,X_test,y_test,cv=3,scoring="accuracy"))
print("Precision Score:",precision_score(y_test,y_test_fit))
print("Recall Score:",recall_score(y_test,y_test_fit))
print("F1-score:",f1_score(y_test,y_test_fit))
accuracy_score(y_test,y_test_fit)

Cross-Validated Accuracy on 3 cv sets: [0.84915085 0.826      0.84484484]
Precision Score: 0.8404669260700389
Recall Score: 0.8577101257445401
F1-score: 0.8490009826400261


0.8463333333333334

Now, creating a Random Forest Classifier

In [126]:
#Creating a 1000 samples each having 100 instances

dec_predict=[]
for i in range(1,1000):
    shuffle_index = np.random.permutation(100)
    X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
    dec_clf=DecisionTreeClassifier(random_state=100,max_depth=4,max_leaf_nodes=6,min_samples_split=5)
    dec_clf.fit(X_train,y_train)
    dec_predict.append(dec_clf.predict(X_test))

In [127]:
#Calculating the final mode of the 1000 models

final=(mode(dec_predict).mode[0])

In [128]:
#Calculating the accuracy

accuracy_score(y_test,final)

0.8363333333333334