<a href="https://colab.research.google.com/github/prabhatksr22/HR-Analytics/blob/main/HR_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Generating synthetic data
np.random.seed(42)
sample_size = 1000

# Features
age = np.random.randint(22, 60, size=sample_size)
monthly_income = np.random.randint(3000, 15000, size=sample_size)
years_of_experience = np.random.randint(0, 30, size=sample_size)
satisfaction_score = np.random.uniform(1.0, 5.0, size=sample_size)

# Adding noise to the data
age_noise = np.random.randint(-5, 6, size=sample_size)
monthly_income_noise = np.random.randint(-1000, 1001, size=sample_size)
years_of_experience_noise = np.random.randint(-3, 4, size=sample_size)
satisfaction_score_noise = np.random.uniform(-0.5, 0.5, size=sample_size)

age += age_noise
monthly_income += monthly_income_noise
years_of_experience += years_of_experience_noise
satisfaction_score += satisfaction_score_noise

# Target variable (0: Stay, 1: Leave)
left_company = np.random.randint(0, 2, size=sample_size)

# Create DataFrame
data = pd.DataFrame({
    'Age': age,
    'Monthly_Income': monthly_income,
    'Years_of_Experience': years_of_experience,
    'Satisfaction_Score': satisfaction_score,
    'Left_Company': left_company
})


In [2]:
from sklearn.model_selection import train_test_split

# Splitting the data into features and target variable
X = data.drop('Left_Company', axis=1)
y = data['Left_Company']

# Splitting the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
data

Unnamed: 0,Age,Monthly_Income,Years_of_Experience,Satisfaction_Score,Left_Company
0,49,6185,9,1.677308,0
1,41,13178,22,2.683564,1
2,31,15413,12,4.508963,1
3,42,8056,8,3.414631,0
4,38,13510,12,2.830074,1
...,...,...,...,...,...
995,34,8412,26,4.693267,0
996,46,13619,24,1.850209,0
997,44,4161,23,1.829736,0
998,37,14015,14,4.697869,0


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_mat)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)


Accuracy: 0.52
Confusion Matrix:
[[56 36]
 [60 48]]
Classification Report:
              precision    recall  f1-score   support

           0       0.48      0.61      0.54        92
           1       0.57      0.44      0.50       108

    accuracy                           0.52       200
   macro avg       0.53      0.53      0.52       200
weighted avg       0.53      0.52      0.52       200



In [5]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
# Define the parameter grid for Grid Search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy')

# Perform Grid Search on the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and model
best_params_grid = grid_search.best_params_
best_rf_model_grid = grid_search.best_estimator_

# Train the best model on the entire training data
best_rf_model_grid.fit(X_train, y_train)

# Make predictions on the test set using the best model
y_pred_grid = best_rf_model_grid.predict(X_test)

# Evaluate the best model
accuracy_grid = accuracy_score(y_test, y_pred_grid)
print("Grid Search Results:")
print(f"Best Hyperparameters: {best_params_grid}")
print(f"Accuracy: {accuracy_grid:.2f}")


Grid Search Results:
Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.48


In [7]:
# Define the parameter distribution for Random Search
param_dist = {
    'n_estimators': [50, 100, 150, 200, 250],
    'max_depth': [None] + list(range(5, 20)),
    'min_samples_split': list(range(2, 20)),
    'min_samples_leaf': list(range(1, 10))
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(rf_classifier, param_distributions=param_dist, n_iter=20, cv=5, scoring='accuracy', random_state=42)

# Perform Random Search on the training data
random_search.fit(X_train, y_train)

# Get the best hyperparameters and model
best_params_random = random_search.best_params_
best_rf_model_random = random_search.best_estimator_

# Train the best model on the entire training data
best_rf_model_random.fit(X_train, y_train)

# Make predictions on the test set using the best model
y_pred_random = best_rf_model_random.predict(X_test)

# Evaluate the best model
accuracy_random = accuracy_score(y_test, y_pred_random)
print("\nRandom Search Results:")
print(f"Best Hyperparameters: {best_params_random}")
print(f"Accuracy: {accuracy_random:.2f}")



Random Search Results:
Best Hyperparameters: {'n_estimators': 100, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_depth': 18}
Accuracy: 0.47


In [11]:
# Assuming you have the best_rf_model_random trained from RandomizedSearchCV

# New employee's feature values (example)
new_employee_features = {
    'Age': 35,
    'Monthly_Income': 8000,
    'Years_of_Experience': 18,
    'Satisfaction_Score': 6
}

# Convert the new employee's features into a DataFrame
new_employee_data = pd.DataFrame([new_employee_features])

# Make predictions for the new employee using the best model
attrition_chance = best_rf_model_random.predict_proba(new_employee_data)[:, 1]

print(f"The attrition chance for the new employee is: {attrition_chance[0]:.2%}")


The attrition chance for the new employee is: 33.36%
