<a href="https://colab.research.google.com/github/senonaderian/abortion/blob/MLs/abortion_tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
# Read the dataset (replace 'data.csv' with your dataset file)
data = pd.read_csv('minmax_normalized_data.csv')

In [None]:
# Extract the target variable (e.g., 'target') and features
X = data.drop('Abortion', axis=1)
y = data['Abortion']

# Split the data into a training and testing set (you can also use cross-validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Feature selection with SelectKBest and chi2 score function
kbest_selector = SelectKBest(chi2, k=10)
X_train_kbest = kbest_selector.fit_transform(X_train, y_train)
X_test_kbest = kbest_selector.transform(X_test)


In [None]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
# Train a Decision Tree classifier with hyperparameter tuning
clf = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(clf, param_grid, cv=5)
grid_search.fit(X_train_kbest, y_train)
clf_optimized = grid_search.best_estimator_


In [None]:
# Evaluate the classifier using test data
y_pred = clf_optimized.predict(X_test_kbest)


In [None]:
# Calculate accuracy and F1 score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [None]:
# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)


In [None]:
# Print evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(cm)

In [None]:
# Convert column index to a list of feature names
feature_names = X.columns.tolist()

# Plot the decision tree with the feature names
plt.figure(figsize=(15, 10))
plot_tree(clf_optimized, filled=True, feature_names=feature_names, class_names=[str(x) for x in clf_optimized.classes_])
plt.show()


In [None]:
# Plot feature importances (if your Decision Tree is shallow)
importances = clf_optimized.feature_importances_
feature_names = X.columns[kbest_selector.get_support()]
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
plt.figure(figsize=(10, 6))
plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance'])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances')
plt.show()