In [None]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 2: Load the dataset
data = pd.read_csv("user_behavior_dataset.csv")
print("First 5 rows of the dataset:\n", data.head())

# Step 3: Data Preprocessing
print("\nChecking for missing values:\n", data.isnull().sum())

# Encode categorical features
le = LabelEncoder()
for col in ['Device Model', 'Operating System', 'Gender']:
    data[col] = le.fit_transform(data[col])

# Features and target
X = data.drop(['User ID', 'User Behavior Class'], axis=1)
y = data['User Behavior Class']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)
print(f"\nTraining samples: {len(X_train)}, Testing samples: {len(X_test)}")

# Step 5: Train SVM models with different kernels
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
results = {}

for kernel in kernels:
    model = SVC(kernel=kernel, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[kernel] = acc
    
    print(f"\n--- SVM with {kernel} kernel ---")
    print(f"Accuracy: {acc:.3f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    # Confusion Matrix Heatmap
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix: SVM ({kernel} kernel)")
    plt.show()

# Step 6: Compare kernel accuracies
plt.figure(figsize=(6,4))
plt.bar(results.keys(), results.values(), color='skyblue')
plt.title('SVM Kernel Accuracy Comparison')
plt.ylabel('Accuracy')
plt.show()

# Step 7: Sample prediction using best kernel (e.g., rbf)
sample_input = [[1,1, 0, 250, 5, 1200, 50, 800, 30]]  
sample_scaled = scaler.transform(sample_input)
best_model = SVC(kernel='rbf', random_state=42)
best_model.fit(X_train, y_train)
predicted_class = best_model.predict(sample_scaled)
print(f"\nPredicted User Behavior Class for given sample input: {predicted_class[0]}")
