In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

# Load the dataset from a CSV file
data = pd.read_csv("Employee Future Prediction.csv")

# Remove rows with missing values
X = data.iloc[:, :-1]  # ویژگی‌ها (داده‌های ورودی)
y = data.iloc[:, -1]  # برچسب‌ها (داده‌های خروجی)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.44, random_state=42)

# Convert categorical features to numeric using one-hot encoding
X = pd.get_dummies(X)

# Define functions to calculate accuracy, precision, recall, and F1-score
def calculate_accuracy(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    total_predictions = len(y_true)
    accuracy = correct_predictions / total_predictions
    return accuracy

def calculate_precision(y_true, y_pred):
    true_positives = np.sum((y_true == 1) & (y_pred == 1))
    predicted_positives = np.sum(y_pred == 1)
    precision = true_positives / predicted_positives
    return precision

def calculate_recall(y_true, y_pred):
    true_positives = np.sum((y_true == 1) & (y_pred == 1))
    actual_positives = np.sum(y_true == 1)
    recall = true_positives / actual_positives
    return recall

def calculate_f1_score(y_true, y_pred):
    precision = calculate_precision(y_true, y_pred)
    recall = calculate_recall(y_true, y_pred)
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

# Initialize the Naive Bayes classifier
nb_clf = GaussianNB()

# Initialize the Decision Tree classifier
dt_clf = DecisionTreeClassifier()

# Perform 3-fold cross-validation for Naive Bayes
kf = KFold(n_splits=3, random_state=None, shuffle=True)
nb_accuracy_scores = []
nb_precision_scores = []
nb_recall_scores = []
nb_f1_scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the Naive Bayes classifier
    nb_clf.fit(X_train, y_train)

    # Make predictions on the test set using Naive Bayes
    nb_y_pred = nb_clf.predict(X_test)

    # Calculate precision, recall, accuracy, and F1-score for Naive Bayes
    nb_accuracy = calculate_accuracy(y_test, nb_y_pred)
    nb_precision = calculate_precision(y_test, nb_y_pred)
    nb_recall = calculate_recall(y_test, nb_y_pred)
    nb_f1 = calculate_f1_score(y_test, nb_y_pred)

    nb_accuracy_scores.append(nb_accuracy)
    nb_precision_scores.append(nb_precision)
    nb_recall_scores.append(nb_recall)
    nb_f1_scores.append(nb_f1)

# Perform 5-fold cross-validation for Decision Tree
kf = KFold(n_splits=5, random_state=None, shuffle=True)
dt_accuracy_scores = []
dt_precision_scores = []
dt_recall_scores = []
dt_f1_scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the Decision Tree classifier
    dt_clf.fit(X_train, y_train)

    # Make predictions on the test set using Decision Tree
    dt_y_pred = dt_clf.predict(X_test)

    # Calculate precision, recall, accuracy, and F1-score for Decision Tree
    dt_accuracy = calculate_accuracy(y_test, dt_y_pred)
    dt_precision = calculate_precision(y_test, dt_y_pred)
    dt_recall = calculate_recall(y_test, dt_y_pred)
    dt_f1 = calculate_f1_score(y_test, dt_y_pred)

    dt_accuracy_scores.append(dt_accuracy)
    dt_precision_scores.append(dt_precision)
    dt_recall_scores.append(dt_recall)
    dt_f1_scores.append(dt_f1)

# Compute average scores for Naive Bayes
nb_avg_accuracy = np.mean(nb_accuracy_scores)
nb_avg_precision = np.mean(nb_precision_scores)
nb_avg_recall = np.mean(nb_recall_scores)
nb_avg_f1 = np.mean(nb_f1_scores)

# Compute average scores for Decision Tree
dt_avg_accuracy = np.mean(dt_accuracy_scores)
dt_avg_precision = np.mean(dt_precision_scores)
dt_avg_recall = np.mean(dt_recall_scores)
dt_avg_f1 = np.mean(dt_f1_scores)

# Print the results for Naive Bayes
print("Naive Bayes - 3-fold Validation")
print("Average Accuracy:", nb_avg_accuracy)
print("Average Precision:", nb_avg_precision)
print("Average Recall:", nb_avg_recall)
print("Average F1-score:", nb_avg_f1)
print("*****************************")
print("Naive Bayes - 5-fold Validation")
print("Average Accuracy:", nb_avg_accuracy)
print("Average Precision:", nb_avg_precision)
print("Average Recall:", nb_avg_recall)
print("Average F1-score:", nb_avg_f1)
print("*******************************")
# Print the results for Decision Tree
print("Decision Tree - 3-fold Validation")
print("Average Accuracy:", dt_avg_accuracy)
print("Average Precision:", dt_avg_precision)
print("Average Recall:", dt_avg_recall)
print("Average F1-score:", dt_avg_f1)
print("*******************************")
print("Decision Tree - 5-fold Validation")
print("Average Accuracy:", dt_avg_accuracy)
print("Average Precision:", dt_avg_precision)
print("Average Recall:", dt_avg_recall)
print("Average F1-score:", dt_avg_f1)


Naive Bayes - 3-fold Validation
Average Accuracy: 0.7094347732645604
Average Precision: 0.5788313047672119
Average Recall: 0.5799311724801798
Average F1-score: 0.5786752293285713
*****************************
Naive Bayes - 5-fold Validation
Average Accuracy: 0.7094347732645604
Average Precision: 0.5788313047672119
Average Recall: 0.5799311724801798
Average F1-score: 0.5786752293285713
*******************************
Decision Tree - 3-fold Validation
Average Accuracy: 0.8130251897023666
Average Precision: 0.7581383944185857
Average Recall: 0.667701595819849
Average F1-score: 0.7098871410077007
*******************************
Decision Tree - 5-fold Validation
Average Accuracy: 0.8130251897023666
Average Precision: 0.7581383944185857
Average Recall: 0.667701595819849
Average F1-score: 0.7098871410077007
