In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report

In [None]:
#load the dataset
df = pd.read_csv('HumanResources.csv')

#Veiw the first 5 rows 
df.head()

In [None]:
df['attriction'] = df['termination_date'].notna().astype(int)

In [None]:
df.drop(columns=['employee_id','first_name','last_name','termination_date'],inplace=True)

In [None]:
#Data feature enginearing
df['hire_date'] = pd.to_datetime(df['hire_date'])
df['birth_date'] = pd.to_datetime(df['birth_date'])

df['age'] = (pd.Timestamp.today() - df['birth_date']).dt.days // 365
df['experience_years'] = (pd.Timestamp.today() -df['hire_date']).dt.days // 365

df.drop(columns=['hire_date', 'birth_date'], inplace=True )

In [None]:
df['gender'] = df['gender'].map({'Male': 1, 'Female': 0})
df['overtime'] = df['overtime'].map({'Yes': 1, 'No': 0})

education_map = {
    'High School': 1,
    'Bachelor': 2,
    'Master': 3,
    'PhD': 4
}
df['education_level'] = df['education_level'].map(education_map)

performance_map = {
    'Needs Improvement': 1,
    'Satisfactory': 2,
    'Good': 3,
    'Excellent': 4
}
df['performance_rating'] = df['performance_rating'].map(performance_map)

In [None]:
pip install pandas

In [None]:
df=pd.get_dummies(df,columns=['state','city','department','job_title'],drop_first=True)

In [None]:
x=df.drop('Attrition',axis=1)
y=df['Attrition']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
## Train model
model=RandomForestClassifier(random_state=42)
model.fit(x_train,y_train)

In [None]:
y_pred=model.predict(x_test)
print("Accuracy:",accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 1) Attrition Distribution
sns.countplot(x='Attrition',data=df)
plt.title("Attrition distribution")
plt.show()

In [None]:
#gender vs attrition
sns.countplot(x='gender', hue='attrition', data=df)
plt.title("Gender vs attrition")
plt.show()

In [None]:
#Department vs attrition
dept_attr = df.groupby('department_Sales')['attrition'].mean()
dept_attr.plot(kind='bar')
plt.title("Department-wise attrition Rate(Sample)")
plt.ylabel("attrition Rate")
plt.show()

In [None]:
sns.histplot(df['salary'], kde=True)
plt.title("Salary Distribution")
plt.show()

In [None]:
sns.boxplot(x='attrition', y='salary', data=df)
plt.title("salary vs attririon")
plt.show()

In [None]:
#correlation Heatmap
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()