SVM for Classification
Problem Statement:
A bank wants to predict whether a customer will opt for a credit card based on their demographic and financial data.

X Variables (Features):

Age
Annual Income
Credit Score
Loan Amount
Number of Dependents
Y Variable (Target):

Will_Opt_Credit_Card: Binary (1 if the customer opts for a credit card, 0 otherwise)

In [1]:
# 1. Importing the Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 2. Loading the Dataset
dataset = pd.read_csv("svm_classification_dataset.csv")

# 3. Display the Number of Columns and Rows
print(f"Dataset shape: {dataset.shape}")

# 4. Statistical Data for Each Column
print(dataset.describe(include="all"))

# 5. Display Null Values in Each Column
print(dataset.isnull().sum())

# 6. Replace Null Values
imputer = SimpleImputer(strategy="mean")
numerical_columns = ["Age", "Annual_Income", "Credit_Score", "Loan_Amount", "Number_of_Dependents"]
dataset[numerical_columns] = imputer.fit_transform(dataset[numerical_columns])

# 7. Encoding
label_encoder = LabelEncoder()
dataset["Will_Opt_Credit_Card"] = label_encoder.fit_transform(dataset["Will_Opt_Credit_Card"])

# 8. Scaling
scaler = StandardScaler()
dataset[numerical_columns] = scaler.fit_transform(dataset[numerical_columns])

# 9. Importing the Model
model = SVC()

# 10. Train-Test Split
X = dataset.drop("Will_Opt_Credit_Card", axis=1)
y = dataset["Will_Opt_Credit_Card"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 11. Training the Model and Prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 12. Performance Evaluation
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# 13. Analysis of Results
if accuracy_score(y_test, y_pred) > 0.8:
    print("The model performed well with high accuracy.")
else:
    print("Consider tuning hyperparameters or balancing the dataset.")


Dataset shape: (500, 6)
              Age  Annual_Income  Credit_Score   Loan_Amount  \
count  450.000000     450.000000    450.000000    450.000000   
mean    44.722222   69170.417778    565.831111  27534.751111   
std     14.798293   28456.214777    161.713908  12690.080729   
min     18.000000   20055.000000    300.000000   5105.000000   
25%     33.000000   43955.250000    430.250000  16727.750000   
50%     45.000000   67760.000000    565.000000  27965.000000   
75%     57.000000   93637.750000    704.750000  38293.250000   
max     69.000000  119835.000000    849.000000  49923.000000   

       Number_of_Dependents  Will_Opt_Credit_Card  
count            450.000000            500.000000  
mean               2.044444              0.536000  
std                1.438503              0.499202  
min                0.000000              0.000000  
25%                1.000000              0.000000  
50%                2.000000              1.000000  
75%                3.000000        