In [None]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style('darkgrid')
color = ['#ED72A3','#8565F0','#22559C', '#F27370','#FA9856','#EDE862']
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
print('ok')

In [None]:
df = pd.read_csv('/kaggle/input/hr-analytics-prediction/HR-Employee-Attrition.csv')
print('ok')

In [None]:
df.head()

In [None]:
print(df.shape)
print(df.duplicated().sum())

tabela = pd.DataFrame({
    'Unique':df.nunique(),
    'Null':df.isna().sum(),
    'NullPercent':df.isna().sum() / len(df),
    'Type':df.dtypes.values
})
display(tabela)

# Transformação

In [None]:
df.drop(['EmployeeCount','EmployeeNumber','StandardHours','Over18'],axis=1,inplace=True)

In [None]:
num = df.select_dtypes(exclude='O')
num.head(3)

In [None]:
cat = df.select_dtypes(include='O')
cat.head(3)

In [None]:
print(df.shape)
print(num.shape)
print(cat.shape)

# Gráfico

In [None]:
plt.figure(figsize=(18,9))
sns.heatmap(df.corr(), annot=True, fmt='.1f')

In [None]:
# correlação forte entre as colunas
colunas = ['Education','Age','MonthlyIncome','JobLevel','NumCompaniesWorked','TotalWorkingYears','YearsAtCompany',
       'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager']
sns.heatmap(df[colunas].corr(), annot=True, fmt='.1f')

In [None]:
colunas = ['WorkLifeBalance','TrainingTimesLastYear','StockOptionLevel',
    'RelationshipSatisfaction','PerformanceRating','NumCompaniesWorked',
   'JobInvolvement', 'JobLevel', 'JobSatisfaction',
   'EnvironmentSatisfaction','Education']

plt.figure(figsize=(9,36))
for i,col in enumerate(colunas):
    axes = plt.subplot(13,2, i + 1)
    sns.countplot(x=df[col], hue=df['Gender'], palette=['#ED72A3','#8565F0'])
plt.tight_layout()
plt.show()

In [None]:
hist = ['Age', 'DailyRate','DistanceFromHome','HourlyRate','MonthlyIncome',
       'MonthlyRate']

plt.figure(figsize=(10,20))
for i,col in enumerate(hist):
    axes = plt.subplot(6,2, i + 1)
    sns.histplot(x=df[col], hue=df['Gender'], palette=['#ED72A3','#8565F0'])
plt.tight_layout()
plt.show()

## Correlação entre os anos

In [None]:
# Correlação: Total de Anos de Trabalho
colunas = ['YearsAtCompany','YearsInCurrentRole', 'YearsSinceLastPromotion','YearsWithCurrManager']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,4))
    sns.pointplot(x=df[i], y=df['TotalWorkingYears'], hue=df['Gender'], palette=color, ci=None)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=12)
    plt.tight_layout()
    plt.show()

In [None]:
# Correlação: Anos na Empresa
colunas = ['YearsInCurrentRole', 'YearsSinceLastPromotion','YearsWithCurrManager']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,4))
    sns.pointplot(x=df[i], y=df['YearsAtCompany'], hue=df['Gender'], palette=color, ci=None)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=12)

    plt.tight_layout()
    plt.show()

In [None]:
# Correlação: Anos na função atual
colunas = ['YearsSinceLastPromotion','YearsWithCurrManager']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,4))
    sns.pointplot(x=df[i], y=df['YearsInCurrentRole'], hue=df['Gender'], palette=color, ci=None)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=12)

    plt.tight_layout()
    plt.show()

## Renda Mensal

### Renda mensal por anos de trabalho

In [None]:
colunas = ['Education','JobLevel','NumCompaniesWorked','TotalWorkingYears','YearsAtCompany',
       'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,4))
    sns.barplot(x=df[i], y=df['MonthlyIncome'], hue=df['Gender'], palette=color, ci=None)
    plt.xticks(fontsize=14)
    for cont in axes.containers:
        axes.bar_label(cont,rotation=90, fontsize=12)
    plt.tight_layout()
    plt.show()

### Renda mensal por gênero

In [None]:
colunas = ['Department','EducationField','Gender','MaritalStatus','BusinessTravel','JobRole']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,6))
    sns.barplot(x=df[i], y=df['MonthlyIncome'], hue=df['Gender'], palette=color, ci=None)
    plt.xticks(rotation=50,fontsize=14)
    for cont in axes.containers:
        axes.bar_label(cont,rotation=30, fontsize=14)
    plt.tight_layout()
    plt.show()

### Renda mensal por Atrito

In [None]:
colunas = ['Gender','Department','EducationField','Gender','MaritalStatus','BusinessTravel','JobRole']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,6))
    sns.barplot(x=df[i], y=df['MonthlyIncome'], hue=df['Attrition'], palette=color, ci=None)
    plt.xticks(rotation=50,fontsize=14)
    for cont in axes.containers:
        axes.bar_label(cont,rotation=30,fontsize=14)
    plt.tight_layout()
    plt.show()

### Renda mensal vs Hora extra

In [None]:
colunas = ['Gender','Department','EducationField','Gender','MaritalStatus','BusinessTravel','JobRole']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,6))
    sns.barplot(x=df[i], y=df['MonthlyIncome'], hue=df['OverTime'], palette=color, ci=None)
    plt.xticks(rotation=50,fontsize=14)
    for cont in axes.containers:
        axes.bar_label(cont,rotation=30,fontsize=14)
    plt.tight_layout()
    plt.show()

### Renda mensal por estado civil

In [None]:
colunas = ['Gender','Department','EducationField','Gender','BusinessTravel','JobRole']

for i in colunas:
    fig, axes = plt.subplots(figsize=(16,6))
    sns.barplot(x=df[i], y=df['MonthlyIncome'], hue=df['MaritalStatus'], palette=color, ci=None)
    plt.xticks(rotation=50,fontsize=14)
    for cont in axes.containers:
        axes.bar_label(cont,rotation=30,fontsize=14)
    plt.tight_layout()
    plt.show()

In [None]:
plt.figure(figsize=(20,40))
for i,col in enumerate(cat):
    axes = plt.subplot(8,2, i + 1)
    df[col].value_counts().plot.pie(autopct='%.2f', colors=color,textprops={'size':12,'color':'black'})
    plt.title(col)
plt.tight_layout()
plt.show()

# Facet Grid

### Renda mensal de quem trabalhou por mais tempo pelo nível de trabalho

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='JobLevel', height=7)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='MonthlyIncome',ci=None)
plt.show()

### Renda mensal por idade pelo nível de trabalho

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='JobLevel', height=7)
g.map_dataframe(sns.barplot, x='Age',y='MonthlyIncome',ci=None)
plt.show()

### Idade de quem trabalhou por mais tempo por nível de trabalho

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='JobLevel', height=7)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',ci=None)
plt.show()

### Renda mensal de quem trabalhou por mais tempo em cada departamento

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='Department', height=7)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='MonthlyIncome',ci=None)
plt.show()

### Renda mensal de quem trabalhou por mais tempo por nível de educacional

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='EducationField', height=6)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='MonthlyIncome',ci=None)
plt.show()

### Renda mensal de quem trabalhou por mais tempo pelo cargo de trabalho

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='JobRole', height=7)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='MonthlyIncome',ci=None)
plt.show()

### Atrito devido aos anos trabalhados na empresa e cargo de trabalho

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='JobRole', hue='Attrition', height=6)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',alpha=0.6,ci=None)
g.add_legend()
plt.show()

### Atrito devido aos anos trabalhados na empresa em cada departamento

In [None]:
g = sns.FacetGrid(df, col='Gender' , row='Department', hue='Attrition', height=8)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',alpha=0.6,ci=None)
g.add_legend()
plt.show()

### Atrito devido aos anos trabalhados na empresa e por viagem a serviço

In [None]:
g = sns.FacetGrid(df, col='Gender', row='BusinessTravel', hue='Attrition', height=8)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',alpha=0.6,ci=None)
g.add_legend()
plt.show()

### Atrito por anos trabalhados na empresa e estado civil 

In [None]:
g = sns.FacetGrid(df, col='Gender', row='MaritalStatus', hue='Attrition', height=8)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',alpha=0.6,ci=None)
g.add_legend()
plt.show()

### Hora extra por departamento

In [None]:
g = sns.FacetGrid(df, col='Gender', row='Department', hue='OverTime', height=8)
g.map_dataframe(sns.barplot, x='TotalWorkingYears',y='Age',alpha=0.6,ci=None)
g.add_legend()
plt.show()