In [1]:
import streamlit as st
import pandas as pd
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import *

In [2]:
st.write("""
# Basic Employee Performance Prediction App
This app predicts the **Employee Performance** type!
""")


In [3]:
dataset = pd.read_excel('EmployeePerformanceData.xls')

In [4]:
dataset.drop(['EmpNumber'],inplace=True,axis=1)

In [5]:
le = LabelEncoder()
dataset["empdep"] = le.fit_transform(dataset["EmpDepartment"])
dataset["empjr"] = le.fit_transform(dataset["EmpJobRole"])


In [6]:
dataset.groupby(by='EmpJobRole')['empjr'].value_counts()

EmpJobRole                 empjr
Business Analyst           0         16
Data Scientist             1         20
Delivery Manager           2         12
Developer                  3        236
Finance Manager            4         49
Healthcare Representative  5         33
Human Resources            6         45
Laboratory Technician      7         64
Manager                    8         51
Manager R&D                9         94
Manufacturing Director     10        33
Research Director          11        19
Research Scientist         12        77
Sales Executive            13       270
Sales Representative       14        69
Senior Developer           15        52
Senior Manager R&D         16        15
Technical Architect        17         7
Technical Lead             18        38
Name: empjr, dtype: int64

In [7]:
st.sidebar.header('User Input Parameters')

<streamlit.DeltaGenerator.DeltaGenerator at 0x7f3b1d4b3450>

In [8]:
def user_input_features():
    #age = st.sidebar.slider('Age', 18, 60, 30)
    #gender = st.radio("Gender", data.Gender.unique())
    #edubkgrnd = st.radio("Educational Background", data.EducationBackground.unique())
    empdep = st.sidebar.radio("Department", dataset.EmpDepartment.unique())
    empjr = st.sidebar.radio("Job Role", dataset.EmpJobRole.unique())
    empenvsat = st.sidebar.slider('Environment Satisfaction', 1, 4, 2)
    empsalhike = st.sidebar.slider('Salary Hike Percent', 11, 25, 15)
    empwrklb = st.sidebar.slider('Work Life Balance', 2, 4, 2)
    empyrsatcomp = st.sidebar.slider('Experience Years at this Company',0, 36, 2)
    empyrscurrole = st.sidebar.slider('Experience Years in Current Role', 0, 15, 5)
    empyrspromo = st.sidebar.slider('Years since last Promotion', 0, 15, 2)
    empyrscurrmng = st.sidebar.slider('Years with Current Manager', 0, 17, 2)
         
    
    data = {'EmpDepartment': empdep,
            'EmpJobRole': empjr,
            'EmpEnvironmentSatisfaction': empenvsat,
            'EmpLastSalaryHikePercent': empsalhike,
            'EmpWorkLifeBalance' : empwrklb,
            'ExperienceYearsAtThisCompany' : empyrsatcomp,
            'ExperienceYearsInCurrentRole' : empyrscurrole,
            'YearsSinceLastPromotion' : empyrspromo,
            'YearsWithCurrManager' : empyrscurrmng }
    features = pd.DataFrame(data, index=[0])
    return features

In [9]:
df = user_input_features()
print(df)

  EmpDepartment       EmpJobRole  EmpEnvironmentSatisfaction  \
0         Sales  Sales Executive                           2   

   EmpLastSalaryHikePercent  EmpWorkLifeBalance  ExperienceYearsAtThisCompany  \
0                        15                   2                             2   

   ExperienceYearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  
0                             5                        2                     2  


In [10]:
st.subheader('User Input parameters')
st.write(df)

In [11]:

if(df.EmpDepartment is "Data Science"):
    df.EmpDepartment = 0
        
elif (df.EmpDepartment is "Development"):
    df.EmpDepartment = 1
        
elif (df.EmpDepartment is "Finance"):
    df.EmpDepartment = 2
        
elif (df.EmpDepartment is "Human Resources"):
    df.EmpDepartment = 3
        
elif (df.EmpDepartment is "Research & Development"):
    df.EmpDepartment = 4
    
else:
    df.EmpDepartment = 5


In [12]:

if(df.EmpJobRole is "Business Analyst"):
    df.EmpJobRole = 0
elif (df.EmpJobRole is "Data Scientist"):
    df.EmpJobRole = 1
elif (df.EmpJobRole is "Delivery Manager"):
    df.EmpJobRole = 2
elif (df.EmpJobRole is "Developer"):
    df.EmpJobRole = 3
elif (df.EmpJobRole is "Finance Manager"):
    df.EmpJobRole = 4
elif (df.EmpJobRole is "HealthCare Representative"):
    df.EmpJobRole = 5
elif (df.EmpJobRole is "Human Resources"):
    df.EmpJobRole = 6
elif (df.EmpJobRole is "Laboratory Technician"):
    df.EmpJobRole = 7
elif (df.EmpJobRole is "Manager"):
    df.EmpJobRole = 8
elif (df.EmpJobRole is "Manager R&D"):
    df.EmpJobRole = 9
elif (df.EmpJobRole is "Manufacturing Director"):
    df.EmpJobRole = 10
elif (df.EmpJobRole is "Research Director"):
    df.EmpJobRole = 11
elif (df.EmpJobRole is "Research Scientist"):
    df.EmpJobRole = 12
elif (df.EmpJobRole is "Sales Executive"):
    df.EmpJobRole = 13
elif (df.EmpJobRole is "Sales Representative"):
    df.EmpJobRole = 14
elif (df.EmpJobRole is "Senior Developer"):
    df.EmpJobRole = 15
elif (df.EmpJobRole is "Senior Manager R&D"):
    df.EmpJobRole = 16
elif (df.EmpJobRole is "Technical Architect"):
    df.EmpJobRole = 17
else:
    df.EmpJobRole = 18

In [13]:
print(df)

   EmpDepartment  EmpJobRole  EmpEnvironmentSatisfaction  \
0              5          18                           2   

   EmpLastSalaryHikePercent  EmpWorkLifeBalance  ExperienceYearsAtThisCompany  \
0                        15                   2                             2   

   ExperienceYearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  
0                             5                        2                     2  


In [14]:
X = dataset.iloc[:,[27,28,9,16,20,21,22,23,24]]
Y = dataset.PerformanceRating

In [15]:

clf = RandomForestClassifier()
clf.fit(X, Y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [16]:
prediction = clf.predict(df)
prediction_proba = clf.predict_proba(df)

In [17]:
print(df)

   EmpDepartment  EmpJobRole  EmpEnvironmentSatisfaction  \
0              5          18                           2   

   EmpLastSalaryHikePercent  EmpWorkLifeBalance  ExperienceYearsAtThisCompany  \
0                        15                   2                             2   

   ExperienceYearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  
0                             5                        2                     2  


In [18]:
print(df)

   EmpDepartment  EmpJobRole  EmpEnvironmentSatisfaction  \
0              5          18                           2   

   EmpLastSalaryHikePercent  EmpWorkLifeBalance  ExperienceYearsAtThisCompany  \
0                        15                   2                             2   

   ExperienceYearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  
0                             5                        2                     2  


In [19]:
st.subheader('Class labels and their corresponding index number')
st.write(dataset.PerformanceRating.unique())

In [20]:
st.subheader('Prediction')
st.write(dataset.PerformanceRating[prediction])
#st.write(prediction)

In [21]:
st.subheader('Prediction Probability')
st.write(prediction_proba)