Resignation Reasoning

In [386]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

from itertools import combinations

In [387]:
df = pd.read_csv('../00_Data/04_NoOutliers.csv')
df.head()

Unnamed: 0,Gender,Generation,Term Sub Reason,Employee Grade,Location,Career Bucket,Age Bucket,Manager ID,Talent,Department,active_months
0,Female,Millennials,Career Growth,Junior,Bengaluru,Below 30,26 To 30,102292,,AC,59.0
1,Female,Millennials,Career Growth,Mid,Bengaluru,31 to 35,26 To 30,100411,,AD,4.0
2,Male,Millennials,Career Growth,Junior,Bengaluru,Below 30,26 To 30,102304,,AG,26.0
3,Male,Gen X,Career Growth,Senior,Bengaluru,Above 35,36 T0 40,102324,,AI,126.0
4,Male,Millennials,Personal Reason,Mid,Bengaluru,31 to 35,31 To 35,100619,,AI,126.0


In [388]:
df['Reason'] = df['Term Sub Reason'].copy()
df.drop(columns=['Term Sub Reason'], inplace=True)
df.head()

Unnamed: 0,Gender,Generation,Employee Grade,Location,Career Bucket,Age Bucket,Manager ID,Talent,Department,active_months,Reason
0,Female,Millennials,Junior,Bengaluru,Below 30,26 To 30,102292,,AC,59.0,Career Growth
1,Female,Millennials,Mid,Bengaluru,31 to 35,26 To 30,100411,,AD,4.0,Career Growth
2,Male,Millennials,Junior,Bengaluru,Below 30,26 To 30,102304,,AG,26.0,Career Growth
3,Male,Gen X,Senior,Bengaluru,Above 35,36 T0 40,102324,,AI,126.0,Career Growth
4,Male,Millennials,Mid,Bengaluru,31 to 35,31 To 35,100619,,AI,126.0,Personal Reason


In [389]:
def binary_encoding(df:pd.DataFrame, column:str, true_value:str):
    """Binary Encoding"""

    unique_values = df[column].unique()

    for val in unique_values:
        if val == true_value:
            df.loc[:, column] = df[column].replace(val, '1').infer_objects(copy=False)
        else:
            df.loc[:, column] = df[column].replace(val, '0').infer_objects(copy=False)

    df[column] = pd.to_numeric(df[column])

    return df

In [390]:
# Label Encoding - If values are not in order
def label_encoding(df:pd.DataFrame, columns:list):
    """Label Encoding Function"""

    le = LabelEncoder()
    for column in columns:
        df[column] = le.fit_transform(df[column])
        df[column] = df[column] + 1
        df[column] = pd.to_numeric(df[column])
    
    return df

In [391]:
def ordinal_encoding(df:pd.DataFrame, column:str, order_list:list):
    """Encoding Ordered Data"""

    data = df.copy()
    count = 1
    for item in order_list:
        data[column] = data[column].replace(item, f'{count}')
        count = count +1 

    data[column] = pd.to_numeric(data[column])
    return data

Apply Encodings

In [392]:
df = binary_encoding(df, 'Gender', 'Female')

label_cols = ['Location', 'Talent', 'Department']
df = label_encoding(df, label_cols)

gen_list = ['Gen Z', 'Millennials', 'Gen X', 'Baby Boomers']
df = ordinal_encoding(df, 'Generation', gen_list)

emp_grade = ['Junior', 'Mid', 'Senior']
df = ordinal_encoding(df, 'Employee Grade', emp_grade)

career_bucket = ['Below 30', '31 to 35', 'Above 35']
df = ordinal_encoding(df, 'Career Bucket', career_bucket)

order_list = ['21 To 25', '26 To 30', '31 To 35', '36 T0 40',  '41 T0 45', '46 T0 50', '51 T0 55']
df = ordinal_encoding(df, 'Age Bucket', order_list)
df.head()

Unnamed: 0,Gender,Generation,Employee Grade,Location,Career Bucket,Age Bucket,Manager ID,Talent,Department,active_months,Reason
0,1,2,1,1,1,2,102292,5,2,59.0,Career Growth
1,1,2,2,1,2,2,100411,5,3,4.0,Career Growth
2,0,2,1,1,1,2,102304,5,5,26.0,Career Growth
3,0,3,3,1,3,4,102324,5,7,126.0,Career Growth
4,0,2,2,1,2,3,100619,5,7,126.0,Personal Reason


Drop Correlated Features

In [393]:
df.drop(['Career Bucket', 'Generation', 'Employee Grade'], axis=1, inplace=True)

In [394]:
df.head()

Unnamed: 0,Gender,Location,Age Bucket,Manager ID,Talent,Department,active_months,Reason
0,1,1,2,102292,5,2,59.0,Career Growth
1,1,1,2,100411,5,3,4.0,Career Growth
2,0,1,2,102304,5,5,26.0,Career Growth
3,0,1,4,102324,5,7,126.0,Career Growth
4,0,1,3,100619,5,7,126.0,Personal Reason


Building Classification

In [395]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [396]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [397]:
scaler = StandardScaler()
XS_train = scaler.fit_transform(X_train)
XS_test = scaler.transform(X_test)

In [398]:
model = KNeighborsClassifier()
model.fit(XS_train, y_train)

In [399]:
model.score(XS_train, y_train)

0.5103626943005182

In [400]:
def classification(df:pd.DataFrame, combo:list,  iteration=1):

    result = []
    for num in range(1, iteration+1):
        X = df.iloc[:, :-1]
        y = df.iloc[:, -1]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

        scaler = StandardScaler()
        XS_train = scaler.fit_transform(X_train)
        XS_test = scaler.transform(X_test)

        model = KNeighborsClassifier()
        model.fit(XS_train, y_train)

        train_score = round(model.score(XS_train, y_train) * 100, 2)
        test_score = round(model.score(XS_test, y_test) * 100, 2)

        stability = 100 - abs(train_score - test_score) 

        result.append([model, combo, train_score, test_score, stability])
    
    df_result = pd.DataFrame(result)
    df_result.columns = ['Model', 'combo', 'Train Score', 'Test Score', 'Stability']
    df_result = df_result.sort_values(by='Stability', ascending=False)

    return df_result

In [401]:
df['Reason'].unique()

array(['Career Growth', 'Personal Reason', 'Relocated to Other Branch',
       'Salary Hike', 'Higher Studies', 'Relocation to Native',
       'Internal Politics', 'Company Rules', 'Health Issues',
       'Work Life Balance', 'Marriage', 'Not Suitable Job for the Skills',
       'Miscellaneous', 'Family Problems', 'Not Interested in JOB',
       'Work Stress'], dtype=object)

In [402]:
df['Reason'].nunique()

16

In [403]:
def update_reason(value):

    if value in ['Salary Hike', 'Career Growth']:
        return value
    else:
        return 'OTH'

In [404]:
df.head()

Unnamed: 0,Gender,Location,Age Bucket,Manager ID,Talent,Department,active_months,Reason
0,1,1,2,102292,5,2,59.0,Career Growth
1,1,1,2,100411,5,3,4.0,Career Growth
2,0,1,2,102304,5,5,26.0,Career Growth
3,0,1,4,102324,5,7,126.0,Career Growth
4,0,1,3,100619,5,7,126.0,Personal Reason


In [405]:
# df['Reason'] = df['Reason'].apply(lambda x:update_reason(x))
# df['Reason'].unique()

In [406]:
classification(df, 100)

Unnamed: 0,Model,combo,Train Score,Test Score,Stability
0,KNeighborsClassifier(),100,52.33,35.54,83.21


Create Combinations

In [407]:
reasons = df['Reason'].unique()
reasons

array(['Career Growth', 'Personal Reason', 'Relocated to Other Branch',
       'Salary Hike', 'Higher Studies', 'Relocation to Native',
       'Internal Politics', 'Company Rules', 'Health Issues',
       'Work Life Balance', 'Marriage', 'Not Suitable Job for the Skills',
       'Miscellaneous', 'Family Problems', 'Not Interested in JOB',
       'Work Stress'], dtype=object)

In [408]:
combos = []
for num in range(1, len(reasons)+ 1):
    combo_list = list(combinations(reasons, num))
    combos.append(combo_list)

combos

[[('Career Growth',),
  ('Personal Reason',),
  ('Relocated to Other Branch',),
  ('Salary Hike',),
  ('Higher Studies',),
  ('Relocation to Native',),
  ('Internal Politics',),
  ('Company Rules',),
  ('Health Issues',),
  ('Work Life Balance',),
  ('Marriage',),
  ('Not Suitable Job for the Skills',),
  ('Miscellaneous',),
  ('Family Problems',),
  ('Not Interested in JOB',),
  ('Work Stress',)],
 [('Career Growth', 'Personal Reason'),
  ('Career Growth', 'Relocated to Other Branch'),
  ('Career Growth', 'Salary Hike'),
  ('Career Growth', 'Higher Studies'),
  ('Career Growth', 'Relocation to Native'),
  ('Career Growth', 'Internal Politics'),
  ('Career Growth', 'Company Rules'),
  ('Career Growth', 'Health Issues'),
  ('Career Growth', 'Work Life Balance'),
  ('Career Growth', 'Marriage'),
  ('Career Growth', 'Not Suitable Job for the Skills'),
  ('Career Growth', 'Miscellaneous'),
  ('Career Growth', 'Family Problems'),
  ('Career Growth', 'Not Interested in JOB'),
  ('Career Grow

In [409]:
result = []
for subcombo in combos:
    for combo in subcombo:
        
        def update_reason(value):

            if value in combo:
                return value
            else:
                return 'OTH'
            
        df_process = df.copy()
        df_process['Reason'] = df_process['Reason'].apply(lambda x:update_reason(x))
        df_process['Reason'].unique()

        df_result = classification(df_process, combo)
        result.append(df_result)
    
    
            
df_final = pd.concat(result)
df_final.sort_values(by=['Test Score', 'Stability'], ascending=False)
df_final.head()

Unnamed: 0,Model,combo,Train Score,Test Score,Stability
0,KNeighborsClassifier(),"(Career Growth,)",74.61,57.23,82.62
0,KNeighborsClassifier(),"(Personal Reason,)",78.5,81.93,96.57
0,KNeighborsClassifier(),"(Relocated to Other Branch,)",99.74,98.19,98.45
0,KNeighborsClassifier(),"(Salary Hike,)",86.53,84.94,98.41
0,KNeighborsClassifier(),"(Higher Studies,)",92.75,86.14,93.39
