## Standard Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set_style("darkgrid")

To ignore harmless warnings

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Loading Data

In [None]:
ibm = pd.read_csv("../input/ibm-hr-analytics-attrition-dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv")

## Explolatory Data Analysis

In [None]:
ibm.head()

In [None]:
ibm.info()

There are 35 columns ! <br>Most of them are catrgorical

In [None]:
ibm.describe()

In [None]:
plt.figure(figsize=(16,9))
sns.heatmap(ibm.isnull(),vmax = 1,vmin = 0,cmap = 'coolwarm');

There are no null value in the dataset

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(ibm['Age'],kde = False);

Looks like average age of an IBM empoyee is between 30-40

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'MaritalStatus',y = 'PerformanceRating',data = ibm);

On average the marital status does not has any effect in Work Performance

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'Attrition',y = 'DistanceFromHome',data = ibm);

Employees who attrited had their **homes farther** than rest of the employees

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(ibm['DailyRate'],kde = False);

The DailyRate for all the employees is almost **Uniform**

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Attrition',y = 'EnvironmentSatisfaction',data = ibm);

Its obvious that attrited employees where **dissatisfied with their work environments**

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'Attrition',y = 'MonthlyIncome',data = ibm);

Again it's obvious that the attrited employees had **lower Monthly Income** than others but there are **many outliers**

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(y = 'Attrition',x = 'YearsAtCompany',data = ibm);

The attrited employees were working for a time lesser than others but again there are exceptions

In [None]:
plt.figure(figsize=(16,9))
sns.heatmap(ibm.corr(),vmin = -1,vmax = 1,cmap='coolwarm');

Let's have a look at the Correlation Matrix<br>
Most of the conclusions from the **Correlation Matrix are obvious**

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Education',y = 'MonthlyIncome',data = ibm);

**Higher levels of education** tends to **Higher Income**<br>
Time to focus on my studies !!

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'BusinessTravel',y = 'PerformanceRating',data = ibm)

Bussiness Travels have no effect on Performance

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Education',y = 'PerformanceRating',data = ibm)

Looks like the level of education has not much to add to the Work Performance

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Gender',y = 'MonthlyIncome',data = ibm);

**The Monthly Income of Female Employees at IBM is more than male employees**

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Gender',y = 'JobSatisfaction',data = ibm);

Job Satisfaction of male employees is slightly better than female employees

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(ibm['Over18'].apply(lambda x : 1 if x == 'Yes' else 0));

All the employees at IBM are **18+**

In [None]:
sns.barplot(x = 'RelationshipSatisfaction', y = 'PerformanceRating',data = ibm);

Like Marital Status, **Relationship Satisfaction too has nothing to do with Job Performance**

In [None]:
plt.figure(figsize=(10,6))
g = sns.barplot(x = 'JobSatisfaction', y = 'JobRole',data = ibm)

On Average **Healthcare Representatives are the most** and **HR Employees are the least satisfied employees**

In [None]:
plt.figure(figsize=(20,6))
sns.barplot(x = 'YearsAtCompany', y = 'JobSatisfaction',data = ibm);

There isn't any trend in Working Years and Job Satisfaction

In [None]:
sns.boxplot(y = 'OverTime', x = 'PercentSalaryHike',data = ibm);

Overtime isn't likely to get you an increase in salary

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'EducationField', y = 'Education',data = ibm,estimator=np.median);

At IBM people from different educatoional backgrounds are **equally and highly educated** !!

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Department', y = 'Education',data = ibm,estimator=np.median);

Similarly, **all departments are equally and educated**

In [None]:
plt.figure(figsize=(10,8))
sns.barplot(y = 'JobRole', x = 'Education',data = ibm,estimator=np.median);

Apart from sales are the Job Roles at IBM demands high educational requirement

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'MaritalStatus', y = 'Education',data = ibm);

In [None]:
g = plt.figure(figsize=(10,6))
sns.distplot(ibm['JobInvolvement'],kde = False);

Most job roles at IBM requires heavy involvement

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'Department', y = 'JobInvolvement',data = ibm);

All the Departments are equally involved at work!! 

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'Education', y = 'PerformanceRating',data = ibm,estimator=np.median);

Education has no role in Performance Rating

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x = 'StockOptionLevel', y = 'NumCompaniesWorked',data = ibm);

The more company an employee has worked with the more stock options he/she has 

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'PerformanceRating', y = 'DistanceFromHome',data = ibm);

Distance of an employee's home from office has  no effect in his/her performance

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'JobSatisfaction', y = 'DistanceFromHome',data = ibm);

Similarly Distance from home does not affcet Job Satisfaction

## <div style="color:cyan">PLEASE UPVOTE IF YOU LIKED MY WORK </div>