# Importing preprocess data and libaries we will be using 

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [4]:
df_encoded = pd.read_csv('preprocessing_mushroom.csv')

In [6]:
df_encoded

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,poisonous
0,5,2,4,1,6,1,0,1,4,0,...,7,7,0,2,1,4,2,3,5,1
1,5,2,9,1,0,1,0,0,4,0,...,7,7,0,2,1,4,3,2,1,0
2,0,2,8,1,3,1,0,0,5,0,...,7,7,0,2,1,4,3,2,3,0
3,5,3,8,1,6,1,0,1,5,0,...,7,7,0,2,1,4,2,3,5,1
4,5,2,3,0,5,1,1,0,4,1,...,7,7,0,2,1,0,3,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,3,2,4,0,5,0,0,0,11,0,...,5,5,0,1,1,4,0,1,2,0
8120,5,2,4,0,5,0,0,0,11,0,...,5,5,0,0,1,4,0,4,2,0
8121,2,2,4,0,5,0,0,0,5,0,...,5,5,0,1,1,4,0,1,2,0
8122,3,3,4,0,8,1,0,1,0,1,...,7,7,0,2,1,0,7,4,2,1


# SVMs models with different parameters

## Split the Data 

In [16]:
X = df_encoded.drop('poisonous', axis=1)  
y = df_encoded['poisonous']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

Training set size: 6499 samples
Test set size: 1625 samples


# Linear SVM

### Linear Kernel, C=1, gamma=scale

In [29]:
linear1_model = SVC(kernel='linear', C=1, gamma='scale')

linear1_model.fit(X_train, y_train)

y_pred = linear1_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 0.9618
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       843
           1       0.95      0.97      0.96       782

    accuracy                           0.96      1625
   macro avg       0.96      0.96      0.96      1625
weighted avg       0.96      0.96      0.96      1625



### Linear Kernel, C=10, gamma=scale

In [33]:
linear10_model = SVC(kernel='linear', C=10, gamma='scale')

linear10_model.fit(X_train, y_train)

y_pred = linear10_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 0.9723
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.96      0.97       843
           1       0.96      0.99      0.97       782

    accuracy                           0.97      1625
   macro avg       0.97      0.97      0.97      1625
weighted avg       0.97      0.97      0.97      1625



### Linear Kernel, C=1, gamma=auto

In [40]:
linear1auto_model = SVC(kernel='linear', C=1, gamma='auto')

linear1auto_model.fit(X_train, y_train)

y_pred = linear1auto_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 0.9618
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       843
           1       0.95      0.97      0.96       782

    accuracy                           0.96      1625
   macro avg       0.96      0.96      0.96      1625
weighted avg       0.96      0.96      0.96      1625



# RBF 

### RBF Kernel, C=1, gamma=auto

In [51]:
rbf1_model = SVC(kernel='rbf', C=1, gamma='auto')  

rbf1_model.fit(X_train, y_train)

y_pred = rbf1_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### RBF Kernel, C=10, gamma=auto

In [61]:
rbf1_model = SVC(kernel='rbf', C=10, gamma='auto')  

rbf1_model.fit(X_train, y_train)

y_pred = rbf1_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



# Polynomial

### Polynomial Kernel, C=1, gamma=auto

In [68]:
poly1_model = SVC(kernel='poly', C=1, degree=3, gamma='scale')

poly1_model.fit(X_train, y_train)

y_pred = poly1_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### Polynomial Kernel, C=10, gamma=auto

In [72]:
poly1_model = SVC(kernel='poly', C=10, degree=3, gamma='scale')

poly1_model.fit(X_train, y_train)

y_pred = poly1_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



# Observations

Most of the models performed above 95% which could mean overfitting, but most important the dataset is not as big to have the most optimal real result, indepentdent from that the model is performing great with all parameters, now it would be interesting what happends when adding HPSVM since the accuracy is already 95%+ and we know HYPSVM are made for datasets with more rows