### Preparing the pay band master

In [None]:
import pandas as pd
import numpy as np
PBM = pd.read_csv('UKI PBM.csv', encoding='latin1')
ROI = pd.read_csv('ROI PBM.csv', encoding='latin1')

In [None]:
import matplotlib.pyplot as plt 
%matplotlib inline 

import seaborn as sns

### Remove Default UJI

In [None]:
default=["000","010","020"]

PBM = PBM.set_index("UJI")
PBM = PBM.drop(default, axis=0)
PBM = PBM.reset_index(inplace = False)

ROI = ROI.set_index("UJI")
ROI = ROI.drop(default, axis=0)
ROI = ROI.reset_index(inplace = False)

### Selecting columns required for merge

In [None]:
PBM = PBM[['UJI', 'Location', 'Pay Band', 'Level', 'UJI Library Short Description', 'Entry', 'Mid Point', 'Top']] #Selecting Mid Point instead of derived Mid

ROI = ROI[['UJI', 'Location', 'Pay Band', 'Level', 'UJI Library Short Description', 'Entry', 'Mid Point', 'Top']] #Selecting Mid Point instead of derived Mid

### Combine UKI and ROI pay band

In [None]:
PBM = pd.concat([PBM, ROI], ignore_index=True)

### Removing Duplicates

In [None]:
PBM['Mid Point'] = PBM['Mid Point'].astype(str).str.replace(',', '')
PBM['Mid Point'] = PBM['Mid Point'].astype(str).str.replace(' ', '').astype(float)
PBM = PBM.sort_values("Mid Point", ascending=False)
PBM = PBM.drop_duplicates(["UJI","Location"])

#Sort by UJI
PBM = PBM.sort_values("UJI")

### Import OPR file

In [None]:
OPR = pd.read_csv('Operational People Roster.csv')

### FTE Salary

In [None]:
OPR['FTE Salary'] = OPR['AnnualCompensationRate'].astype(str).str.replace(',', '').astype(float).mul(OPR['FTE1'])

### Combine with pay band information

In [None]:
OPR = pd.merge(OPR, PBM, how='left', left_on= ['SalaryGrade','LocationCity'], right_on= ['UJI', 'Location'])

### Compa-Ratio

In [None]:
OPR['Compa-Ratio'] = np.where(OPR['FTE Salary']==0, np.nan, OPR["FTE Salary"]/OPR["Mid Point"].astype(str).str.replace(',', '').astype(float))
OPR['Compa-Ratio'] = OPR['Compa-Ratio'].replace(np.inf, np.nan)

### Entity

In [None]:
OPR["Entity"] = np.where(OPR['BU'].str.startswith("IE"), "ROI", "UKI")
OPR['Entity'] = np.where(OPR['BU'].str.startswith("GB080"), "FSO", OPR.Entity)
ROIFS = ["IE002","IE032", "IE042", "IE062"]
OPR['Entity'] = np.where(OPR['BU'].isin(ROIFS), "ROI FS", OPR.Entity)


### Allowances

In [None]:
from datetime import datetime
OPR['CurrentEmploymentDate1']= pd.to_datetime(OPR['CurrentEmploymentDate1'], format='%d-%b-%y')
now = pd.Timestamp('now')
OPR['Tenure'] = OPR['CurrentEmploymentDate1'].where(OPR['CurrentEmploymentDate1'] < now, OPR['CurrentEmploymentDate1'] -  np.timedelta64(100, 'Y'))
OPR['Tenure']= (now - OPR['Tenure']).astype('<m8[Y]')
OPR['CurrentEmploymentDate1'] = OPR['CurrentEmploymentDate1'].dt.strftime('%d-%b-%y')

In [None]:
OPR2 = OPR.copy()
OPR2 = OPR2[['GPN', 'LPN', 'LastName', 'FirstName', 'JobCode', 'JobCodeDesc1', 'SalaryGrade','Level','UJI Library Short Description','Rank','RankDesc','EYGrade','BusinessTitleJobPage','JobFamily','JobFamilyDesc','FTE Salary', 'AnnualCompensationRate','Entry', 'Mid Point', 'Top','Compa-Ratio','Entity','BU','BUName','ServiceLine','SubSL1','DepartmentCode1','DepartmentName1','OU','OUName','MU','MUName','SMU','SMUName','Codeblock','LocationCode','LocationCity','LocationAddress1','StandardHours','StandardHoursSalaryAdminPlan','FTE1','EmployeeClass','RegularTemporary','PaygroupCode','EmployeeCategoryCode','EmployeeCategory','CurrentEmploymentDate1','SeniorityPayDate','GUI','EmployeeID1','EmployeeStatus1','CounselorGUI','CounselorName','gTESupervisorGUI','gTESupervisorName','EmailAddress','GenderCode','Ethnicity','MaritalStatus','DateOfBirth','EmployeeRecord', 'Tenure']]
OPR2.rename(columns = {'SalaryGrade': 'UJI', 'UJI Library Short Description': 'UJI Description','BusinessTitleJobPage': 'Job Title', 'AnnualCompensationRate': 'Actual Salary', 'Mid Point': 'Mid', 'GenderCode': 'Gender'}, inplace = True)

In [None]:
Manager=[32, 63]
Director=[21, 62, 13, 61]
AM=[64,55,42]
OPR2['PMI'] = np.where((OPR2['Rank'].isin(Manager)) | (OPR2['Rank'].isin(Director)), 1092.72, np.nan)
OPR2['PMI'] = np.where(OPR2['Tenure'] > 4, 1092.72, OPR2['PMI'])

OPR2['PMI'] = np.where((OPR2['Tenure'] > 4) & (OPR2['BU'].str.startswith("IE")), np.nan, OPR2['PMI'])
OPR2['PMI'] = np.where((OPR2['Rank'].isin(Manager)) & (OPR2['BU'].str.startswith("IE")) |(OPR2['Rank'].isin(AM)) & (OPR2['BU'].str.startswith("IE")) | (OPR2['Rank'].isin(Director)) & (OPR2['BU'].str.startswith("IE")), 660, OPR2['PMI'])

OPR2['Car'] = np.where(OPR2['Rank'].isin(Manager), 4500, np.nan)
OPR2['Car'] = np.where(OPR2['Rank'].isin(Director), 5500, OPR2['Car'])
OPR2['Car'] = np.where(OPR2['BU'].str.startswith("IE"), np.nan, OPR2['Car'])
                     
OPR2['FTE Pension'] = np.where(OPR2['BU'].str.startswith("IE"), OPR2["FTE Salary"]* 0.07, OPR2["FTE Salary"]* 0.06 * 1.138)
Allowance = OPR2[["FTE Pension", "Car", "PMI"]] 
OPR2["FTE Allowance"] = Allowance.sum(axis=1)


In [None]:
Output = OPR2.copy()

### Drop/Rename Columns

In [None]:
Output['Entry'] = Output['Entry'].astype(str).str.replace(',', '')
Output['Entry'] = Output['Entry'].astype(str).str.replace(' ', '').astype(float)

Output['Top'] = Output['Top'].astype(str).str.replace(',', '')
Output['Top'] = Output['Top'].astype(str).str.replace(' ', '').astype(float)

Output['Actual Salary'] = Output['Actual Salary'].astype(str).str.replace(',', '')
Output['Actual Salary'] = Output['Actual Salary'].astype(str).str.replace(' ', '').astype(float)

Output['Salary Penetration']= (Output['FTE Salary']-Output['Entry'])/(Output['Top']-Output['Entry'])

Output['CurrentEmploymentDate1']= pd.to_datetime(Output['CurrentEmploymentDate1'], format='%d-%b-%y')
Output['DateOfBirth']= pd.to_datetime(Output['DateOfBirth'], format='%d-%b-%y')

#Output.style.format({'FTE1': "{:.1%}",'Compa-Ratio': "{:.2%}",'Salary Penetration': "{:.2%}"})

In [None]:
df = Output[['GPN', 'JobCode', 'JobCodeDesc1', 'UJI','Level','UJI Description','Rank','RankDesc','FTE Salary', 'Actual Salary', 'FTE Allowance','Salary Penetration','Compa-Ratio','Entity','BU','BUName','ServiceLine','SubSL1','DepartmentCode1','DepartmentName1','OU','OUName','MU','MUName','SMU','SMUName','LocationCode','LocationCity','FTE1','EmployeeClass','RegularTemporary','EmployeeCategory','CurrentEmploymentDate1','EmployeeStatus1','Gender','Ethnicity','MaritalStatus','DateOfBirth']]

In [None]:
df.head(5)

In [None]:
df.shape

In [None]:
df.dtypes

Rank = df.drop_duplicates(["Rank","RankDesc"])
Rank = Rank[['Rank', 'RankDesc']]
Rank

#### Rank

In [None]:
Ass = [66, 44, 57, 55, 56, 58]
Snr = [42,65]
Mgr = [32, 63]
SM= [21, 62]
D = [61, 13, ]
P = [11]
I = [51, 53]
N = [4]

In [None]:
df[('RankDesc')].unique().tolist()

In [None]:
df.columns.values.tolist()

#### Numerical Columns

In [None]:
# numerical columns
numerical_cols = ['FTE Salary', 'Actual Salary', 'FTE Allowance', 'Salary Penetration', 'Compa-Ratio', 'FTE1']
# check if contain null
df[numerical_cols].isnull().sum()

In [None]:
np.where(df['Rank'].isin(SM))

In [None]:
play.head()

In [None]:
sns.boxplot(data=play, x='Entity', y='FTE Salary', width=.5, hue='Gender');

df['DateOfBirth'] = pd.to_datetime(df['DateOfBirth'])
df['SeniorityPayDate'] = pd.to_datetime(df['SeniorityPayDate'])
df['CurrentEmploymentDate1'] = pd.to_datetime(df['CurrentEmploymentDate1'])


### Save to CSV

df.to_csv(r'C:\Users\2022464\Documents\Learning\Python\OPR\Explore.csv',encoding='utf-8-sig')