In [None]:
import pandas as pd           # For data manipulation
import numpy as np            # For numerical operations
from sklearn.model_selection import train_test_split   # For splitting data into train/test sets
from sklearn.svm import SVC                             # For Support Vector Machine algorithm
from sklearn.preprocessing import StandardScaler        # For feature scaling
from sklearn.metrics import accuracy_score              # To evaluate model performance

# Load the data
path = 'D:\BIM\Summer Project\datasets\cleaned_ibm_dataset.pkl'
data = pd.read_pickle(path)

In [None]:
# Split the data into features and target
X = data.drop('Attrition', axis=1) # Input variables
y = data['Attrition'] # Variable to predict

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=41)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Create and train the logistic regression model
model = SVC(kernel='linear', random_state=42) # Instance of the model
model.fit(X_train, y_train) # Train the model

# Make predictions on the test set
predictions = model.predict(X_test)
print(predictions[:])

# Evaluate the model's performance
accuracy = accuracy_score(y_test, predictions)
print(f'\nModel accuracy: {accuracy:.2f}\n')

from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

In [None]:
from sklearn.metrics import confusion_matrix

# Create a confusion matrix

cm = confusion_matrix(y_test, predictions)
print(cm)

# Plot the confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns

sns.heatmap(cm, annot=True, fmt='g')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')

plt.title('Confusion Matrix')
plt.show()