In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("Day 57 Support Vector Machines.csv")

# Display basic info
print(df.info())

# Show the first 5 rows
print(df.head())

# Check for missing values
print(df.isnull().sum())


In [None]:
# Compute summary statistics
print(df.describe())

# Plot feature distributions
df.hist(figsize=(12, 8), bins=30, edgecolor="black")
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
print(df.columns)


In [None]:
target_column = df.columns[-1]  # Select the last column as the target
X = df.drop(columns=[target_column])
y = df[target_column]

print(f"Selected Target Column: {target_column}")


In [None]:
from sklearn.model_selection import train_test_split

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train an SVM Classifier with Linear Kernel
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

svm_linear = SVC(kernel="linear", C=1.0, random_state=42)
svm_linear.fit(X_train, y_train)

# Predictions
y_pred_linear = svm_linear.predict(X_test)

# Performance
print("SVM (Linear Kernel) Accuracy:", accuracy_score(y_test, y_pred_linear))
print(classification_report(y_test, y_pred_linear))


In [None]:
# Train SVM with Polynomial Kernel
svm_poly = SVC(kernel="poly", degree=3, C=1.0, random_state=42)
svm_poly.fit(X_train, y_train)
y_pred_poly = svm_poly.predict(X_test)

# Train SVM with RBF Kernel
svm_rbf = SVC(kernel="rbf", C=1.0, gamma="scale", random_state=42)
svm_rbf.fit(X_train, y_train)
y_pred_rbf = svm_rbf.predict(X_test)

# Compare Performance
print("SVM (Polynomial Kernel) Accuracy:", accuracy_score(y_test, y_pred_poly))
print("SVM (RBF Kernel) Accuracy:", accuracy_score(y_test, y_pred_rbf))


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC


In [None]:
# Define hyperparameter grid
param_grid = {
    "C": [0.1, 1, 10],
    "kernel": ["linear", "poly", "rbf"],
    "gamma": ["scale", "auto"]
}


In [None]:
# Initialize SVM model
svm = SVC()

# Initialize GridSearchCV
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring="accuracy", n_jobs=-1)

# Fit on training data
grid_search.fit(X_train, y_train)

# Best Parameters & Score
print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)


In [None]:
# Get the best model
best_svm = grid_search.best_estimator_

# Predict on test set
y_pred_best = best_svm.predict(X_test)

# Performance Metrics
from sklearn.metrics import accuracy_score, classification_report

print("Best SVM Model Accuracy:", accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best))
