In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the dataset
df = pd.read_csv("ds.csv")

# Convert 'Attrition' to binary
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# Select features and target
X = df[['Tenure', 'JobSatisfaction', 'Salary']]
y = df['Attrition']

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate performance
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
 [[12  0]
 [ 6  2]]

Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80        12
           1       1.00      0.25      0.40         8

    accuracy                           0.70        20
   macro avg       0.83      0.62      0.60        20
weighted avg       0.80      0.70      0.64        20


Accuracy Score: 0.7


In [2]:
import pandas as pd

# New employee data: [Tenure, JobSatisfaction, Salary]
new_data = pd.DataFrame({
    'Tenure': [2.0],
    'JobSatisfaction': [3],
    'Salary': [55000]
})

# Predict
prediction = model.predict(new_data)
print("Predicted Attrition:", "Yes" if prediction[0] == 1 else "No")

Predicted Attrition: No


In [3]:
# prompt: generate a predictive model code to predict attrition based on tenure , jobsatisfaction and salary

# Assuming 'Salary' is numerical, let's check if it needs scaling
# For Logistic Regression, scaling is often beneficial for numerical features.
# We will use StandardScaler for scaling Salary and Tenure.
from sklearn.preprocessing import StandardScaler

# Select features and target
X = df[['Tenure', 'JobSatisfaction', 'Salary']]
y = df['Attrition']

# Create a list of numerical features to scale
numerical_features = ['Tenure', 'Salary']

# Initialize StandardScaler
scaler = StandardScaler()

# Fit and transform the scaler on the numerical features in the training data
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])

# Transform the numerical features in the test data
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

# Initialize and train model (already done, but re-fitting after scaling)
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate performance (already done, but re-evaluating after scaling)
print("Confusion Matrix after scaling:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report after scaling:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score after scaling:", accuracy_score(y_test, y_pred))

# Predict for new employee data (must also be scaled)
new_data = pd.DataFrame({
    'Tenure': [2.0],
    'JobSatisfaction': [3],
    'Salary': [55000]
})

# Scale the numerical features of the new data using the same scaler
new_data[numerical_features] = scaler.transform(new_data[numerical_features])

# Predict
prediction = model.predict(new_data)
print("\nPredicted Attrition for new employee (after scaling):", "Yes" if prediction[0] == 1 else "No")

Confusion Matrix after scaling:
 [[12  0]
 [ 7  1]]

Classification Report after scaling:
               precision    recall  f1-score   support

           0       0.63      1.00      0.77        12
           1       1.00      0.12      0.22         8

    accuracy                           0.65        20
   macro avg       0.82      0.56      0.50        20
weighted avg       0.78      0.65      0.55        20


Accuracy Score after scaling: 0.65

Predicted Attrition for new employee (after scaling): No


In [4]:
# prompt: generate a predictive model code to predict attrition based on tenure , jobsatisfaction and salary use random forest as the model

from sklearn.ensemble import RandomForestClassifier

# Initialize and train model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate performance
print("Random Forest Confusion Matrix after scaling:\n", confusion_matrix(y_test, y_pred_rf))
print("\nRandom Forest Classification Report after scaling:\n", classification_report(y_test, y_pred_rf))
print("\nRandom Forest Accuracy Score after scaling:", accuracy_score(y_test, y_pred_rf))

# Predict for new employee data (must also be scaled)
# new_data is already scaled from the previous Logistic Regression example

# Predict
prediction_rf = rf_model.predict(new_data)
print("\nRandom Forest Predicted Attrition for new employee (after scaling):", "Yes" if prediction_rf[0] == 1 else "No")

Random Forest Confusion Matrix after scaling:
 [[10  2]
 [ 8  0]]

Random Forest Classification Report after scaling:
               precision    recall  f1-score   support

           0       0.56      0.83      0.67        12
           1       0.00      0.00      0.00         8

    accuracy                           0.50        20
   macro avg       0.28      0.42      0.33        20
weighted avg       0.33      0.50      0.40        20


Random Forest Accuracy Score after scaling: 0.5

Random Forest Predicted Attrition for new employee (after scaling): No
