<a href="https://colab.research.google.com/github/luisFelipeEvilla/arboles-de-desicion-tasas-de-interes-por-creditos/blob/master/Parcial2_IA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import the necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve, auc
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("https://www.datos.gov.co/api/views/u8ud-84pb/rows.csv?accessType=DOWNLOAD")

# Preprocess the dataset
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build two decision trees based on Shannon entropy, each with a different depth level
tree1 = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)
tree2 = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=42)

# Train the decision trees using the training set
tree1.fit(X_train, y_train)
tree2.fit(X_train, y_train)

# Test the decision trees using the testing set and calculate the performance measurements
y_pred1 = tree1.predict(X_test)
y_pred2 = tree2.predict(X_test)

acc1 = accuracy_score(y_test, y_pred1)
acc2 = accuracy_score(y_test, y_pred2)

prec1 = precision_score(y_test, y_pred1, average='weighted')
prec2 = precision_score(y_test, y_pred2, average='weighted')

rec1 = recall_score(y_test, y_pred1, average='weighted')
rec2 = recall_score(y_test, y_pred2, average='weighted')

fpr1, tpr1, _ = roc_curve(y_test, y_pred1)
fpr2, tpr2, _ = roc_curve(y_test, y_pred2)

roc_auc1 = auc(fpr1, tpr1)
roc_auc2 = auc(fpr2, tpr2)

# Print the performance measurements
print("Decision Tree 1 (max_depth=3):")
print("Accuracy:", acc1)
print("Precision:", prec1)
print("Recall:", rec1)
print("ROC AUC:", roc_auc1)

print("\nDecision Tree 2 (max_depth=5):")
print("Accuracy:", acc2)
print("Precision:", prec2)
print("Recall:", rec2)
print("ROC AUC:", roc_auc2)

# Plot the ROC curves
plt.plot(fpr1, tpr1, label='Decision Tree 1 (AUC = %0.2f)' % roc_auc1)
plt.plot(fpr2, tpr2, label='Decision Tree 2 (AUC = %0.2f)' % roc_auc2)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()