In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load dataset
data = pd.read_csv("mushroom.csv")
data




In [None]:
# Overview of the data
print(data.info())
print(data.describe())
print(data.head())


In [None]:

# Check for missing values
print(data.isnull().sum())

# Visualize feature distributions
for column in data.columns:
    plt.figure(figsize=(8, 4))
    sns.countplot(data[column])
    plt.title(f'Distribution of {column}')
    plt.xticks(rotation=45)
    plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
encoder = LabelEncoder()
for column in data.columns:
    data[column] = encoder.fit_transform(data[column])



In [None]:
data

In [None]:
# Split dataset into features and target
X = data.drop('class', axis=1)  # Assuming 'class' is the target column
y = data['class']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
X_train, X_test, y_train, y_test

In [None]:
# Pair plot of features
sns.pairplot(data, hue='class')
plt.show()



In [None]:
# Visualize class distribution
sns.countplot(x=y)
plt.title('Class Distribution')
plt.show()



In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Train an SVM classifier
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)



In [None]:
# Evaluate model performance
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:
from sklearn.decomposition import PCA

# Reduce to 2D using PCA
pca = PCA(n_components=2)
X_train_2D = pca.fit_transform(X_train)
X_test_2D = pca.transform(X_test)




In [None]:
# Retrain SVM on reduced data
svm_model.fit(X_train_2D, y_train)
y_pred_2D = svm_model.predict(X_test_2D)



In [None]:
# Visualize results
plt.figure(figsize=(8, 6))
plt.scatter(X_test_2D[:, 0], X_test_2D[:, 1], c=y_pred_2D, cmap='coolwarm', alpha=0.7)
plt.title('SVM Classification Results (2D Projection)')
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf', 'poly']
}



In [None]:
# Perform Grid Search
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


In [None]:
# Best parameters and model
print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_


In [None]:
# Evaluate best model
y_pred_best = best_model.predict(X_test)
print(classification_report(y_test, y_pred_best))


In [None]:
kernels = ['linear', 'poly', 'rbf']
for kernel in kernels:
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)
    y_pred_kernel = model.predict(X_test)
    print(f"Kernel: {kernel}")
    print(classification_report(y_test, y_pred_kernel))
