In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))

## About Features

 - satisfaction_level: Level of satisfaction {0–1}.
 - last_evaluationTime: Time since last performance evaluation (in years).
 - number_project: Number of projects completed while at work.
 - average_montly_hours: Average monthly hours at workplace.
 - time_spend_company: Number of years spent in the company.
 - Work_accident: Whether the employee had a workplace accident.
 - left: Whether the employee left the workplace or not {0, 1}.
 - promotion_last_5years: Whether the employee was promoted in the last five years.
 - sales: Department the employee works for.
 - salary: Relative level of salary {low, medium, high}.

## Importing Dataset

In [None]:
df = pd.read_csv('../input/HR_comma_sep.csv')
df.head()

In [None]:
print(df.shape)
print()
print(df.dtypes)
print()
print(df.isnull().sum())

## Data Preprocessing

In [None]:
df.rename(columns={"sales":"department"}, inplace=True)

In [None]:
#Mapping categorical salary
df.salary = df.salary.map({"low": 0, "medium": 1, "high": 2})

In [None]:
df.head()

In [None]:
df.groupby("left").mean()

## Exploratory Data Analysis

In [None]:
pd.crosstab(df.department, df.left).plot(kind="bar")
plt.title("Turnover Frequency for Department")
plt.xlabel("Department")
plt.ylabel("Frequency of Turnover")
plt.show()

In [None]:
sns.countplot(df.left)
plt.xticks((0, 1), ["Didn't leave", "Left"])
plt.xlabel("Class counts")
plt.ylabel("Count")
plt.show()

In [None]:
pd.crosstab(df.salary, df.left).div(pd.crosstab(df.salary, df.left).sum(1).astype(float), axis=0).plot(kind='bar', stacked=True)
plt.xticks((0, 1, 2), ["High", "Low", "Medium"])
plt.title("Stacked Bar Chart of Salary Level vs Turnover")
plt.xlabel("Salary Level")
plt.ylabel("Proportion of Employees")
plt.show()

In [None]:
plt.pie(df.department.value_counts().values, labels=df.department.value_counts().index)
plt.legend()
plt.axis("equal")
plt.show()

In [None]:
pd.crosstab(df.left, df.salary).div(pd.crosstab(df.left, df.salary).sum(1).astype(float), axis=0).plot(kind='bar', stacked=True)
plt.title("Stacked Chart of Salary vs Class")
plt.xlabel("Class")
plt.xticks([0, 1], ["Didn't leave", "Left"])
plt.ylabel("Proportion of Salary")
plt.show()

In [None]:
# Create dummy variables for department feature
#df = pd.get_dummies(df, columns=["department"], drop_first=True)
#df.head()