In [None]:
# ðŸ’³ Support Vector Machine Classification on Bank Transactions Dataset

# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Step 2: Load Dataset
df = pd.read_csv("bank_transactions_data_2.csv")
print("âœ… Dataset Loaded Successfully\n")
print(df.head())

print("\n--- Dataset Overview ---")
print(df.info())

print("\n--- Shape ---")
print(df.shape)

print("\n--- Checking for Missing Values ---")
print(df.isnull().sum())

# Step 3: Data Preprocessing
# Drop irrelevant columns (like IDs, IP, timestamps)
df = df.drop(['TransactionID', 'AccountID', 'TransactionDate', 'DeviceID', 'IP Address', 'MerchantID', 'PreviousTransactionDate'], axis=1)

# Encode categorical columns
categorical_cols = ['TransactionType', 'Location', 'Channel', 'CustomerOccupation']
encoder = LabelEncoder()
for col in categorical_cols:
    df[col] = encoder.fit_transform(df[col])

print("\n--- Data After Encoding ---")
print(df.head())

# Step 4: Define Features (X) and Target (y)
# Here we classify based on TransactionType (Debit=0, Credit=1)
X = df.drop('TransactionType', axis=1)
y = df['TransactionType']

# Step 5: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Split Dataset into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

# Step 7: Train SVM Models with Linear and Polynomial Kernels
kernels = ['linear', 'poly']
accuracy_results = {}

for kernel in kernels:
    model = SVC(kernel=kernel, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Evaluate Model
    acc = accuracy_score(y_test, y_pred)
    accuracy_results[kernel] = acc
    
    print(f"\n--- SVM ({kernel.upper()} Kernel) ---")
    print(f"Accuracy: {acc * 100:.2f}%")
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    # Plot Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {kernel.upper()} Kernel')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    
# Step 8: Compare Accuracy Between Kernels
print("\n=== Accuracy Comparison Between Kernels ===")
for k, v in accuracy_results.items():
    print(f"{k.upper()} Kernel: {v * 100:.2f}%")

# Step 9: Visualization - Accuracy Comparison
plt.figure(figsize=(7,5))
plt.bar(accuracy_results.keys(), accuracy_results.values(), color=['blue', 'orange'])
plt.title("SVM Kernel Accuracy Comparison (Linear vs Polynomial)")
plt.xlabel("Kernel Type")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.show()    