***This notebook is a part of chapter 4.4***

#1. Prepare classification dataset

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

# Step 1: Create an example dataset for classification problems
X, y = make_classification(n_samples=200, n_features=10, n_informative=2, n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
df = pd.DataFrame(X,columns=[f'col_{i}' for i in range(10)])
df.loc[:,'label'] = y

In [None]:
df.head()

In [None]:
# this visualization take time aroung 2-3 minutes.
sns.pairplot(df,hue='label') # can show some feature that affect to label
# focus on destribution that can clearly sperated data from each other

#2. Logistic regression

In [None]:
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X, y)
y_pred_logistic = logistic_model.predict(X)

# Evaluate Logistic Regression
accuracy_logistic = accuracy_score(y, y_pred_logistic)
print("Logistic Regression Accuracy:", accuracy_logistic)
print("Classification Report for Logistic Regression:\n", classification_report(y, y_pred_logistic))
print("Confusion Matrix for Logistic Regression:\n", confusion_matrix(y, y_pred_logistic))


#3. Decision tree

In [None]:
tree_model = DecisionTreeClassifier(random_state=42,max_depth=3)
tree_model.fit(X, y)
y_pred_tree = tree_model.predict(X)

# Evaluate Decision Tree
accuracy_tree = accuracy_score(y, y_pred_tree)
print("\nDecision Tree Accuracy:", accuracy_tree)
print("Classification Report for Decision Tree:\n", classification_report(y, y_pred_tree))
print("Confusion Matrix for Decision Tree:\n", confusion_matrix(y, y_pred_tree))


#4. Neural network

In [None]:
nn_model = MLPClassifier(hidden_layer_sizes=(20,), max_iter=500, random_state=42)
nn_model.fit(X, y)
y_pred_nn = nn_model.predict(X)

# Evaluate Neural Network
accuracy_nn = accuracy_score(y, y_pred_nn)
print("\nNeural Network Accuracy:", accuracy_nn)
print("Classification Report for Neural Network:\n", classification_report(y, y_pred_nn))
print("Confusion Matrix for Neural Network:\n", confusion_matrix(y, y_pred_nn))

# 5. Compare model performances

In [None]:
# Plot confusion matrices
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Confusion matrix for Logistic Regression
sns.heatmap(confusion_matrix(y, y_pred_logistic), annot=True, fmt='d', ax=axes[0], cmap='Blues')
axes[0].set_title('Logistic Regression Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

# Confusion matrix for Decision Tree
sns.heatmap(confusion_matrix(y, y_pred_tree), annot=True, fmt='d', ax=axes[1], cmap='Blues')
axes[1].set_title('Decision Tree Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

# Confusion matrix for Neural Network
sns.heatmap(confusion_matrix(y, y_pred_nn), annot=True, fmt='d', ax=axes[2], cmap='Blues')
axes[2].set_title('Neural Network Confusion Matrix')
axes[2].set_xlabel('Predicted')
axes[2].set_ylabel('Actual')

plt.tight_layout()
plt.show()