In [1]:
import pandas as pd
from lifelines import CoxPHFitter

# Sample data (replace this with your own data)
data = {
    'EmployeeID': range(1, 201),
    'Age': [35, 42, 28, 39, 45, 33, 48, 29, 37, 41] * 20,
    'Gender': ['Male', 'Female'] * 100,
    'Department': ['Sales', 'Marketing', 'Finance', 'HR'] * 50,
    'Salary': [50000, 60000, 55000, 52000, 58000, 53000, 57000, 54000, 59000, 51000] * 20,
    'PerformanceRating': [4, 3, 5, 4, 4, 3, 5, 4, 3, 5] * 20,
    'Tenure': [2, 3, 1, 4, 5, 2, 6, 1, 3, 4] * 20,
    'Resignation': [0, 1, 0, 0, 1, 0, 1, 0, 0, 1] * 20  # 1 if employee resigned, 0 if still employed
}
df = pd.DataFrame(data)

# Encode categorical variables
df['Gender'] = pd.get_dummies(df['Gender'], drop_first=True)  # Convert gender to binary (Male: 1, Female: 0)
df = pd.get_dummies(df, columns=['Department'], drop_first=True)  # Convert department to dummy variables

# Fit Cox proportional hazards regression model
cph = CoxPHFitter()
cph.fit(df[['Age', 'Gender', 'Salary', 'PerformanceRating', 'Tenure', 'Resignation']], duration_col='Tenure', event_col='Resignation')

# Print coefficients
print("Coefficients:")
print(cph.summary)

# Predict probability of resignation for a hypothetical employee
hypothetical_employee = pd.DataFrame({
    'Age': [40],
    'Gender': [1],  # Male
    'Salary': [55000],
    'PerformanceRating': [4],
    'Tenure': [3]  # Years
})
prob_resignation = cph.predict_survival_function(hypothetical_employee, times=[1, 2, 3, 4, 5])  # Predict at specific time points
print("\nProbability of resignation over time for the hypothetical employee:")
print(prob_resignation)


Coefficients:
                       coef  exp(coef)  se(coef)  coef lower 95%   
covariate                                                          
Age               -0.551575   0.576042  0.214734       -0.972447  \
Gender            -4.842531   0.007887  0.735278       -6.283649   
Salary             0.001098   1.001098  0.000286        0.000538   
PerformanceRating  2.758552  15.776985  1.015993        0.767243   

                   coef upper 95%  exp(coef) lower 95%  exp(coef) upper 95%   
covariate                                                                     
Age                     -0.130703             0.378157             0.877478  \
Gender                  -3.401414             0.001867             0.033326   
Salary                   0.001658             1.000538             1.001659   
PerformanceRating        4.749862             2.153819           115.568306   

                   cmp to         z             p   -log2(p)  
covariate                              

