Title: Classification Model Performance Metrics

Sub-Title: Confusion Matrix

Task 1: Construct and interpret a confusion matrix for tumor classification.

In [None]:

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sample dataset (replace with your actual dataset)
data = pd.DataFrame({
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100),
    'tumor_type': np.random.choice(['Benign', 'Malignant'], size=100)
})

# Map tumor type to numerical values
tumor_map = {'Benign': 0, 'Malignant': 1}
data['tumor_type'] = data['tumor_type'].map(tumor_map)

# Split the dataset into features (X) and target (y)
X = data[['feature1', 'feature2']]
y = data['tumor_type']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Construct the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Interpret the confusion matrix
print("Confusion Matrix:")
print(cm)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate metrics from the confusion matrix
TP = cm[1, 1]  # True positives
TN = cm[0, 0]  # True negatives
FP = cm[0, 1]  # False positives
FN = cm[1, 0]  # False negatives

sensitivity = TP / (TP + FN)
specificity = TN / (TN + FP)
precision = TP / (TP + FP)

print("Sensitivity (Recall):", sensitivity)
print("Specificity:", specificity)
print("Precision:", precision)



Task 2: Create a confusion matrix for fraud detection model evaluation.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sample dataset (replace with your actual dataset)
data = pd.DataFrame({
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100),
    'is_fraud': np.random.choice([0, 1], size=100, p=[0.9, 0.1])  # 1: Fraud, 0: Not Fraud
})

# Split the dataset into features (X) and target (y)
X = data[['feature1', 'feature2']]
y = data['is_fraud']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Create the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Calculate metrics from the confusion matrix
TP = cm[1, 1]  # True positives (fraud correctly identified)
TN = cm[0, 0]  # True negatives (non-fraud correctly identified)
FP = cm[0, 1]  # False positives (non-fraud incorrectly identified as fraud)
FN = cm[1, 0]  # False negatives (fraud incorrectly identified as non-fraud)

precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

print("Confusion Matrix:")
print(cm)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Precision:", precision)
print("Recall (Sensitivity):", recall)
print("F1-score:", f1_score)




Task 3: Use confusion matrix to evaluate model predicting employee retention.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sample dataset (replace with your actual dataset)
data = pd.DataFrame({
    'satisfaction_level': np.random.rand(100),
    'last_evaluation': np.random.rand(100),
    'retention': np.random.choice([0, 1], size=100, p=[0.7, 0.3])  # 1: Retained, 0: Not Retained
})

# Split the dataset into features (X) and target (y)
X = data[['satisfaction_level', 'last_evaluation']]
y = data['retention']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Create the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Calculate metrics from the confusion matrix
TP = cm[1, 1]  # True positives (retained employees correctly predicted)
TN = cm[0, 0]  # True negatives (not retained employees correctly predicted)
FP = cm[0, 1]  # False positives (not retained employees incorrectly predicted as retained)
FN = cm[1, 0]  # False negatives (retained employees incorrectly predicted as not retained)

precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

print("Confusion Matrix:")
print(cm)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Precision:", precision)
print("Recall (Sensitivity):", recall)
print("F1-score:", f1_score)
