<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/DecisionTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset download link:
https://drive.google.com/file/d/115328Fks1LZS0JDxr4-d2hPOWyXwjwJS/view?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Decision Tree Regressor

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('/content/drive/MyDrive/datasets/Position_Salaries.csv')
df

In [None]:
df.shape
df.columns

In [None]:
X = df.iloc[:,1:2].values
y = df.iloc[:,2].values
X , y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.45, random_state = 0)

In [None]:
dtr_model = DecisionTreeRegressor(random_state = 0)
dtr_model.fit(X, y)

In [None]:
y_pred=dtr_model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

r2_score = r2_score(y_test, y_pred)
print("R2_score:", r2_score)

In [None]:
y_pred=dtr_model.predict([[3.7]])
y_pred

In [None]:
X_grid = np.arange(min(X), max(X), 0.01)
X_grid2 = X_grid.reshape((len(X_grid), 1))
y_grid2_pred=dtr_model.predict(X_grid2)
plt.scatter(X, y, color = 'red')
plt.plot(X_grid2, y_grid2_pred, color = 'blue')
plt.title('Decision Tree Regression')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

# Decision Tree Classifier

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score , recall_score , precision_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)

dtc_model = DecisionTreeClassifier()
dtc_model.fit(X_train, y_train)

y_pred =dtc_model.predict(X_test)

print("confusion_matrix:\n", confusion_matrix(y_test, y_pred))
print("accuracy_score:", accuracy_score(y_test, y_pred))
print("recall_score:", recall_score(y_test, y_pred,average='micro'))
print("precision_score:", precision_score(y_test, y_pred,average='micro'))

# Decision Tree Classifier - Criterion

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=1000, n_features=5)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

tree_gain = DecisionTreeClassifier(criterion='entropy')
tree_gain.fit(X_train, y_train)

y_pred=tree_gain.predict(X_test)
accuracy_score=accuracy_score(y_test,y_pred)
print("Accuracy Score:", accuracy_score)

tree_gini = DecisionTreeClassifier(criterion='gini')
tree_gini.fit(X_train, y_train)

accuracy_gain = tree_gain.score(X_test, y_test)
print("Accuracy using Gain:", accuracy_gain)



# Pruning

In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy before pruning:", accuracy)

path = clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

clfs = []
for ccp_alpha in ccp_alphas:
    clf_temp = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
    clf_temp.fit(X_train, y_train)
    clfs.append(clf_temp)

clfs = clfs[:-1]

max_accuracy = -1
best_clf = None
for clf in clfs:
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("\t\t*accuracy ", accuracy)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        best_clf = clf

print("Accuracy after pruning:", max_accuracy)