# IBM HR Analytics Employee Attrition & Performance

Descubra los factores que conducen a la deserción de los empleados y explore preguntas como "muéstre un desglose de la distancia desde el hogar al trabajo por función laboral y deserción" o "compare el ingreso mensual promedio por educación y deserción". Este es un conjunto de datos ficticios creado por científicos de datos de IBM.

Principales variables:

|º|Education| EnvironmentSatisfaction |JobInvolvement|PerformanceRating|RelationshipSatisfaction|WorkLifeBalance
|:--:|:------:|:------:|:------:|:------:|:------:|:------:|
|1|'Below College'| 'Low'|'Low'|'Low'|'Low'|'Bad'|
|2|'College'|'Medium'|'Medium'|'Good'|'Medium'|'Good'|
|3|'Bachelor'|'High'|'High'|'Excellent'|'High'| 'Better'|
|4|'Master'|'Very High'|'Very High'|'Outstanding'|'Very High'|'Best'|
|5|'Doctor'||





### Importo librerias

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl            # Matplotlib
import matplotlib.pyplot as plt     # El modulo Pyplot de Matplotlib
import seaborn as sns               # Seaborn

mpl.style.use('bmh')    # Establecemos un nuevo estilo

from prettytable import PrettyTable

## 1) DATA PROCESS & EXPLORATORY DATA ANALYSIS (EDA)¶


In [2]:
# Cargamos el dataset
df = pd.read_csv('Datos/WA_Fn-UseC-HR-Employee-Attrition.csv')
pd.set_option('max_columns', None)
df.sample(6)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,Gender,HourlyRate,JobInvolvement,JobLevel,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,MonthlyRate,NumCompaniesWorked,Over18,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
1193,38,No,Travel_Frequently,148,Research & Development,2,3,Medical,1,1675,4,Female,42,2,1,Laboratory Technician,2,Single,2440,23826,1,Y,No,22,4,2,80,0,4,3,3,4,3,3,3
976,56,No,Travel_Rarely,1369,Research & Development,23,3,Life Sciences,1,1373,4,Male,68,3,4,Manufacturing Director,2,Married,13402,18235,4,Y,Yes,12,3,1,80,1,33,0,3,19,16,15,9
256,42,No,Travel_Rarely,269,Research & Development,2,3,Medical,1,351,4,Female,56,2,1,Laboratory Technician,1,Divorced,2593,8007,0,Y,Yes,11,3,3,80,1,10,4,3,9,6,7,8
1110,35,Yes,Travel_Rarely,104,Research & Development,2,3,Life Sciences,1,1569,1,Female,69,3,1,Laboratory Technician,1,Divorced,2074,26619,1,Y,Yes,12,3,4,80,1,1,2,3,1,0,0,0
1015,34,No,Travel_Frequently,560,Research & Development,1,4,Other,1,1431,4,Male,91,3,1,Research Scientist,1,Divorced,2996,20284,5,Y,No,14,3,3,80,2,10,2,3,4,3,1,3
772,56,No,Travel_Frequently,1240,Research & Development,9,3,Medical,1,1071,1,Female,63,3,1,Research Scientist,3,Married,2942,12154,2,Y,No,19,3,2,80,1,18,4,3,5,4,0,3


In [3]:
# Generamos una descripcion de cada campo
def df_explore(df):
    #Description table of the main characteristics of each column from a dataset
    print( 'Shape: ', df.shape)
    
    t = PrettyTable(['Column', 
                     'Type',
                     'Non-Null',
                     'Nulls',
                     'Unique',
                     'Example',
                    ])
    
    for c in df.columns:
        t.add_row([c,
                   df[c].dtype,
                   len(df[c])-np.sum(df[c].isna()),
                   np.sum(df[c].isna()),
                   np.count_nonzero(df[c].unique()),
                   df[~df[c].isnull()][c].iloc[0],
                  ]) 
    print(t)
    print()
    return

df_explore(df)

Shape:  (1470, 35)
+--------------------------+--------+----------+-------+--------+-----------------+
|          Column          |  Type  | Non-Null | Nulls | Unique |     Example     |
+--------------------------+--------+----------+-------+--------+-----------------+
|           Age            | int64  |   1470   |   0   |   43   |        41       |
|        Attrition         | object |   1470   |   0   |   2    |       Yes       |
|      BusinessTravel      | object |   1470   |   0   |   3    |  Travel_Rarely  |
|        DailyRate         | int64  |   1470   |   0   |  886   |       1102      |
|        Department        | object |   1470   |   0   |   3    |      Sales      |
|     DistanceFromHome     | int64  |   1470   |   0   |   29   |        1        |
|        Education         | int64  |   1470   |   0   |   5    |        2        |
|      EducationField      | object |   1470   |   0   |   6    |  Life Sciences  |
|      EmployeeCount       | int64  |   1470   |   0   | 

**OBS:** No hay valores nulos

### 1.1: Verifico los valore únicos de algunas variables


In [100]:
# Generamos tabla con valores unicos de ciertos campos

def df_unique_val_col(df, col_list = [] ):
    
    #Cuento la máxima cantidad de valores unicos que tienen los campos
    cant_table_col = 0
    for i in col_list:
        l = len(df[i].unique())
        if cant_table_col < l:
            cant_table_col = l
    
    # Creo la lista de campos "value_x" para armar el indice y definir el tamaño de la tabla (alto)
    A = []
    for i in range(cant_table_col):
        A.append('Value_'+str(i+1))
    
    #creo un dataframe vacio
    dg = pd.DataFrame(index = A,
                          columns = df[col_list].columns)    

    #Completo el dataframe con los valores unicos sino lo lleno con '-'
    for m in df[col_list]:
        uni = df[m].unique()
        le = len(uni)
        
        for j in range(cant_table_col):
            if j < le:
                dg[m][j] = uni[j]
            else:
                dg[m][j] = '-'

    print('Valores únicos de algunos campos de interes')
    return dg



In [101]:
df_unique_val_col(df,L)

Valores únicos de algunos campos de interes


Unnamed: 0,Attrition,BusinessTravel,Department,EducationField,Gender,JobRole,MaritalStatus,Over18,OverTime
Value_1,Yes,Travel_Rarely,Sales,Life Sciences,Female,Sales Executive,Single,Y,Yes
Value_2,No,Travel_Frequently,Research & Development,Other,Male,Research Scientist,Married,-,No
Value_3,-,Non-Travel,Human Resources,Medical,-,Laboratory Technician,Divorced,-,-
Value_4,-,-,-,Marketing,-,Manufacturing Director,-,-,-
Value_5,-,-,-,Technical Degree,-,Healthcare Representative,-,-,-
Value_6,-,-,-,Human Resources,-,Manager,-,-,-
Value_7,-,-,-,-,-,Sales Representative,-,-,-
Value_8,-,-,-,-,-,Research Director,-,-,-
Value_9,-,-,-,-,-,Human Resources,-,-,-
