In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
data = pd.read_csv('VOIS_Dataset.csv')


In [2]:

# Define features and target variable
X = data.drop('Attrition', axis=1)  # Features
y = data['Attrition']  # Target variable

In [3]:

# Perform one-hot encoding for categorical columns
X = pd.get_dummies(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:

# Initialize models
logistic_reg = LogisticRegression(max_iter=10000)
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
decision_tree = DecisionTreeClassifier(random_state=42)


In [5]:
# Train and evaluate models
models = {
    'Logistic Regression': logistic_reg,
    'Random Forest': random_forest,
    'Decision Tree': decision_tree
}

accuracy_results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    accuracy_results[name] = accuracy
    
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, predictions))
    
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    print("-------------------------------------------")

Model: Logistic Regression
Accuracy: 89.80%

Confusion Matrix:
[[250   5]
 [ 25  14]]

Classification Report:
              precision    recall  f1-score   support

          No       0.91      0.98      0.94       255
         Yes       0.74      0.36      0.48        39

    accuracy                           0.90       294
   macro avg       0.82      0.67      0.71       294
weighted avg       0.89      0.90      0.88       294

-------------------------------------------
Model: Random Forest
Accuracy: 87.76%

Confusion Matrix:
[[252   3]
 [ 33   6]]

Classification Report:
              precision    recall  f1-score   support

          No       0.88      0.99      0.93       255
         Yes       0.67      0.15      0.25        39

    accuracy                           0.88       294
   macro avg       0.78      0.57      0.59       294
weighted avg       0.86      0.88      0.84       294

-------------------------------------------
Model: Decision Tree
Accuracy: 77.89%

Confu

In [6]:
# Find the best model
best_model = max(accuracy_results, key=accuracy_results.get)
best_accuracy = accuracy_results[best_model]

print(f"The best model is: {best_model}")
print(f"Accuracy of the best model: {best_accuracy * 100:.2f}%")

The best model is: Logistic Regression
Accuracy of the best model: 89.80%
