# Practical

1. Write a Python program to train an SVM Classifier on the Iris dataset and evaluate accuracy.

In [None]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the SVM classifier with a radial basis function (RBF) kernel
svm_model = SVC(kernel='rbf', gamma='scale', C=1.0)

# Train the SVM model
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display a detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

2.  Write a Python program to train two SVM classifiers with Linear and RBF kernels on the Wine dataset, then
compare their accuracies.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the Wine dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the SVM classifiers with Linear and RBF kernels
svm_linear = SVC(kernel='linear', random_state=42)
svm_rbf = SVC(kernel='rbf', gamma='scale', random_state=42)

# Train the Linear SVM model
svm_linear.fit(X_train, y_train)
y_pred_linear = svm_linear.predict(X_test)
accuracy_linear = accuracy_score(y_test, y_pred_linear)

# Train the RBF SVM model
svm_rbf.fit(X_train, y_train)
y_pred_rbf = svm_rbf.predict(X_test)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)

# Display the accuracies
print(f"Accuracy of Linear SVM: {accuracy_linear:.2f}")
print(f"Accuracy of RBF SVM: {accuracy_rbf:.2f}")

3. Write a Python program to train an SVM Regressor (SVR) on a housing dataset and evaluate it using Mean
Squared Error (MSE).

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, explained_variance_score
from sklearn.model_selection import train_test_split

# Load the California housing dataset instead of the Boston housing dataset
# as load_boston has been removed due to ethical concerns
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing.data
y = housing.target


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the SVR model with RBF kernel
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1, gamma='scale')

# Train the SVR model
svr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svr_model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Calculate Explained Variance Score
evs = explained_variance_score(y_test, y_pred)
print(f"Explained Variance Score: {evs:.2f}")

4. Write a Python program to train an SVM Classifier with a Polynomial Kernel and visualize the decision
boundary.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Generate a synthetic 2D dataset
# Changed n_informative to 2 to satisfy the constraint:
# n_classes * n_clusters_per_class <= 2**n_informative
# 2 * 2 <= 2**2 which is True
X, y = datasets.make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=2, n_redundant=0, n_repeated=0, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the SVM classifier with a Polynomial kernel
svm_poly = SVC(kernel='poly', degree=3, C=1.0, coef0=1, random_state=42)

# Train the SVM model
svm_poly.fit(X_train, y_train)

# Create a mesh grid for plotting
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))

# Predict over the mesh grid
Z = svm_poly.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.75, cmap=plt.cm.coolwarm)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o', s=50, cmap=plt.cm.coolwarm)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('SVM Classifier with Polynomial Kernel')
plt.show()

5. Write a Python program to train a Gaussian Naïve Bayes classifier on the Breast Cancer dataset and
evaluate accuracy.

In [None]:
# Import necessary libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Gaussian Naïve Bayes classifier
gnb = GaussianNB()

# Train the classifier
gnb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

6. Write a Python program to train a Multinomial Naïve Bayes classifier for text classification using the 20
Newsgroups dataset.

In [None]:
# Import necessary libraries
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the 20 Newsgroups dataset
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, test_size=0.2, random_state=42)

# Initialize the CountVectorizer to convert text to a bag-of-words representation
vectorizer = CountVectorizer(stop_words='english', max_features=5000)

# Fit and transform the training data, and transform the test data
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Initialize the Multinomial Naïve Bayes classifier
nb_classifier = MultinomialNB()

# Train the classifier
nb_classifier.fit(X_train_vec, y_train)

# Make predictions on the test set
y_pred = nb_classifier.predict(X_test_vec)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Display the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=newsgroups.target_names))

7. Write a Python program to train an SVM Classifier with different C values and compare the decision boundaries visually.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Step 1: Generate a synthetic 2D dataset (binary classification)
X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=42)

# Step 2: Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define different values of C
C_values = [0.1, 1, 10]

# Step 4: Create a plot for each value of C and visualize the decision boundaries
fig, axes = plt.subplots(1, len(C_values), figsize=(15, 5))

for i, C in enumerate(C_values):
    # Step 5: Train the SVM Classifier with the current value of C
    svm_classifier = SVC(kernel='linear', C=C)
    svm_classifier.fit(X_train, y_train)

    # Step 6: Plot the decision boundary for the current classifier
    ax = axes[i]

    # Plotting the decision boundary
    h = .02  # Step size in the mesh
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    ax.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', marker='o', cmap=plt.cm.coolwarm)
    ax.set_title(f"SVM Classifier with C={C}")
    ax.set_xlabel('Feature 1')
    ax.set_ylabel('Feature 2')

plt.tight_layout()
plt.show()


8. Write a Python program to train a Bernoulli Naïve Bayes classifier for binary classification on a dataset with
binary features.

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import make_classification

# Generate a synthetic binary classification dataset with binary features
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=0,
                           n_clusters_per_class=1, n_classes=2, random_state=42)

# Convert the features to binary (0 or 1) by thresholding at 0.5
X = (X > 0.5).astype(int)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Bernoulli Naive Bayes classifier
bnb = BernoulliNB()

# Train the model
bnb.fit(X_train, y_train)

# Predict on the test set
y_pred = bnb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


9. Write a Python program to apply feature scaling before training an SVM model and compare results with unscaled data.

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# --- SVM model on unscaled data ---
svm_unscaled = SVC(kernel='linear', random_state=42)
svm_unscaled.fit(X_train, y_train)

# Predict and evaluate on unscaled data
y_pred_unscaled = svm_unscaled.predict(X_test)
accuracy_unscaled = accuracy_score(y_test, y_pred_unscaled)

print("Results on unscaled data:")
print(f"Accuracy: {accuracy_unscaled:.4f}")
print(classification_report(y_test, y_pred_unscaled))

# --- Feature Scaling ---
scaler = StandardScaler()

# Fit and transform the training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test data using the same scaler
X_test_scaled = scaler.transform(X_test)

# --- SVM model on scaled data ---
svm_scaled = SVC(kernel='linear', random_state=42)
svm_scaled.fit(X_train_scaled, y_train)

# Predict and evaluate on scaled data
y_pred_scaled = svm_scaled.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)

print("\nResults on scaled data:")
print(f"Accuracy: {accuracy_scaled:.4f}")
print(classification_report(y_test, y_pred_scaled))

# --- Comparison ---
print("\nComparison of accuracies:")
print(f"Accuracy on unscaled data: {accuracy_unscaled:.4f}")
print(f"Accuracy on scaled data: {accuracy_scaled:.4f}")

10. Write a Python program to train a Gaussian Naïve Bayes model and compare the predictions before and after Laplace Smoothing.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset (you can replace this with any dataset)
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# --- Gaussian Naive Bayes model without smoothing ---
gnb_no_smoothing = GaussianNB(var_smoothing=1e-9)  # var_smoothing controls the smoothing (default is 1e-9, so no smoothing)
gnb_no_smoothing.fit(X_train, y_train)

# Predictions with the model without smoothing
y_pred_no_smoothing = gnb_no_smoothing.predict(X_test)

# --- Applying Laplace-like smoothing ---
# For Gaussian Naïve Bayes, Laplace smoothing isn't directly applied, but we can control the variance smoothing (var_smoothing).
# Increasing var_smoothing effectively adds a kind of smoothing by controlling the variance.
gnb_with_smoothing = GaussianNB(var_smoothing=1.0)  # Set a higher value for smoothing
gnb_with_smoothing.fit(X_train, y_train)

# Predictions with the model after smoothing
y_pred_with_smoothing = gnb_with_smoothing.predict(X_test)

# --- Comparison ---
accuracy_no_smoothing = accuracy_score(y_test, y_pred_no_smoothing)
accuracy_with_smoothing = accuracy_score(y_test, y_pred_with_smoothing)

print(f"Accuracy without smoothing: {accuracy_no_smoothing:.4f}")
print(f"Accuracy with smoothing: {accuracy_with_smoothing:.4f}")

# Print the classification report for both models
print("\nClassification Report without Smoothing:")
print(classification_report(y_test, y_pred_no_smoothing))

print("\nClassification Report with Smoothing:")
print(classification_report(y_test, y_pred_with_smoothing))


 11. Write a Python program to train an SVM Classifier and use GridSearchCV to tune the hyperparameters (C,gamma, kernel).

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the Iris dataset (you can replace this with your dataset)
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Set up the SVM classifier
svm = SVC()

# Step 4: Define the hyperparameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],        # Regularization parameter
    'gamma': [0.001, 0.01, 0.1, 1],  # Kernel coefficient
    'kernel': ['linear', 'rbf', 'poly']  # Types of kernels
}

# Step 5: Use GridSearchCV to find the best parameters (using 5-fold cross-validation)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, verbose=1, n_jobs=-1)

# Fit the model to the training data
grid_search.fit(X_train, y_train)

# Step 6: Get the best parameters and the best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Step 7: Make predictions using the best model
y_pred = best_model.predict(X_test)

# Step 8: Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Accuracy of the best model: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


12. Write a Python program to train an SVM Classifier on an imbalanced dataset and apply class weighting andcheck it improve accuracy.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Create an imbalanced dataset by removing samples from the majority class
# We will remove some samples of the majority class (class 1: Versicolor)
X_imbalanced = X[y != 1]  # Remove class 1 samples
y_imbalanced = y[y != 1]

# Step 3: Split the imbalanced dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X_imbalanced, y_imbalanced, test_size=0.3, random_state=42)

# Step 4: Train the SVM Classifier without class weights
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Step 6: Calculate the accuracy without class weights
accuracy_no_weight = accuracy_score(y_test, y_pred)
print(f"Accuracy without class weights: {accuracy_no_weight:.4f}")

# Step 7: Compute class weights based on the training data
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(zip(np.unique(y_train), class_weights))

# Step 8: Train the SVM Classifier with class weights
svm_classifier_weighted = SVC(kernel='linear', class_weight=class_weight_dict, random_state=42)
svm_classifier_weighted.fit(X_train, y_train)

# Step 9: Make predictions on the test set
y_pred_weighted = svm_classifier_weighted.predict(X_test)

# Step 10: Calculate the accuracy with class weights
accuracy_weighted = accuracy_score(y_test, y_pred_weighted)
print(f"Accuracy with class weights: {accuracy_weighted:.4f}")


 13. Write a Python program to implement a Naïve Bayes classifier for spam detection using email data.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Load the SMS Spam Collection dataset
# You can download the dataset from https://archive.ics.uci.edu/ml/datasets/sms+spam+collection
# For this example, let's assume it's a CSV file with 'label' and 'message' columns.
df = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['label', 'message'])

# Step 2: Preprocess the data
# We will use a simple split: ham = not spam and spam = 1
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Step 3: Split the dataset into features (X) and labels (y)
X = df['message']
y = df['label']

# Step 4: Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Vectorize the text data using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Step 6: Train the Naïve Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_tfidf, y_train)

# Step 7: Make predictions on the test set
y_pred = nb_classifier.predict(X_test_tfidf)

# Step 8: Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification Report (Precision, Recall, F1-score)
print("Classification Report:")
print(classification_report(y_test, y_pred))


14. Write a Python program to train an SVM Classifier and a Naïve Bayes Classifier on the same dataset and compare their accuracy.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the SVM Classifier
svm_classifier = SVC(kernel='rbf', random_state=42)
svm_classifier.fit(X_train, y_train)

# Step 4: Train the Naïve Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Step 5: Make predictions with both classifiers
y_pred_svm = svm_classifier.predict(X_test)
y_pred_nb = nb_classifier.predict(X_test)

# Step 6: Evaluate and compare the accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

print(f"Accuracy of SVM classifier: {accuracy_svm:.4f}")
print(f"Accuracy of Naïve Bayes classifier: {accuracy_nb:.4f}")

15. Write a Python program to perform feature selection before training a Naïve Bayes classifier and compare results.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import accuracy_score

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the Naïve Bayes classifier without feature selection
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Step 4: Make predictions and evaluate the performance without feature selection
y_pred_no_fs = nb_classifier.predict(X_test)
accuracy_no_fs = accuracy_score(y_test, y_pred_no_fs)

print(f"Accuracy without feature selection: {accuracy_no_fs:.4f}")

# Step 5: Perform Feature Selection using SelectKBest and chi-squared test
# Select the top 2 features based on the chi-squared test
select_k_best = SelectKBest(chi2, k=2)
X_train_selected = select_k_best.fit_transform(X_train, y_train)
X_test_selected = select_k_best.transform(X_test)

# Step 6: Train the Naïve Bayes classifier with the selected features
nb_classifier.fit(X_train_selected, y_train)

# Step 7: Make predictions and evaluate the performance with feature selection
y_pred_fs = nb_classifier.predict(X_test_selected)
accuracy_fs = accuracy_score(y_test, y_pred_fs)

print(f"Accuracy with feature selection: {accuracy_fs:.4f}")


16. Write a Python program to train an SVM Classifier using One-vs-Rest (OvR) and One-vs-One (OvO) strategies on the Wine dataset and compare their accuracy

In [None]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Step 1: Load the Wine dataset
data = load_wine()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the SVM Classifier using the One-vs-Rest strategy (OvR)
svm_ovr = SVC(decision_function_shape='ovr', random_state=42)  # One-vs-Rest
svm_ovr.fit(X_train, y_train)

# Step 4: Train the SVM Classifier using the One-vs-One strategy (OvO)
svm_ovo = SVC(decision_function_shape='ovo', random_state=42)  # One-vs-One
svm_ovo.fit(X_train, y_train)

# Step 5: Make predictions and evaluate the performance using both strategies
y_pred_ovr = svm_ovr.predict(X_test)
y_pred_ovo = svm_ovo.predict(X_test)

accuracy_ovr = accuracy_score(y_test, y_pred_ovr)
accuracy_ovo = accuracy_score(y_test, y_pred_ovo)

# Step 6: Print the accuracy results
print(f"Accuracy with One-vs-Rest (OvR) strategy: {accuracy_ovr:.4f}")
print(f"Accuracy with One-vs-One (OvO) strategy: {accuracy_ovo:.4f}")

17. Write a Python program to train an SVM Classifier using Linear, Polynomial, and RBF kernels on the Breast Cancer dataset and compare their accuracy.

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Step 1: Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the SVM Classifier using the Linear kernel
svm_linear = SVC(kernel='linear', random_state=42)
svm_linear.fit(X_train, y_train)

# Step 4: Train the SVM Classifier using the Polynomial kernel
svm_poly = SVC(kernel='poly', degree=3, random_state=42)  # Degree=3 is a common choice for polynomial kernels
svm_poly.fit(X_train, y_train)

# Step 5: Train the SVM Classifier using the RBF kernel
svm_rbf = SVC(kernel='rbf', random_state=42)
svm_rbf.fit(X_train, y_train)

# Step 6: Make predictions and evaluate the performance using all three kernels
y_pred_linear = svm_linear.predict(X_test)
y_pred_poly = svm_poly.predict(X_test)
y_pred_rbf = svm_rbf.predict(X_test)

accuracy_linear = accuracy_score(y_test, y_pred_linear)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)

# Step 7: Print the accuracy results
print(f"Accuracy with Linear kernel: {accuracy_linear:.4f}")
print(f"Accuracy with Polynomial kernel: {accuracy_poly:.4f}")
print(f"Accuracy with RBF kernel: {accuracy_rbf:.4f}")

18. Write a Python program to train an SVM Classifier using Stratified K-Fold Cross-Validation and compute the average accuracy.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Initialize the SVM Classifier (using a linear kernel for simplicity)
svm_classifier = SVC(kernel='linear', random_state=42)

# Step 3: Initialize Stratified K-Fold Cross-Validation with 5 splits
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Step 4: Perform cross-validation and compute the accuracy for each fold
accuracies = cross_val_score(svm_classifier, X, y, cv=stratified_kfold, scoring='accuracy')

# Step 5: Compute the average accuracy
average_accuracy = np.mean(accuracies)

# Step 6: Print the results
print(f"Accuracy for each fold: {accuracies}")
print(f"Average accuracy: {average_accuracy:.4f}")


19. Write a Python program to train a Naïve Bayes classifier using different prior probabilities and compare performance.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Define different sets of prior probabilities
# 1. Uniform prior probabilities (equal for each class)
uniform_priors = [1/3, 1/3, 1/3]

# 2. Prior probabilities based on class distribution in the dataset
class_probs = np.bincount(y_train) / len(y_train)

# 3. Custom prior probabilities (for example, giving higher weight to class 0)
custom_priors = [0.5, 0.3, 0.2]

# Step 4: Train Naïve Bayes classifier with different prior probabilities

# 1. Naïve Bayes with uniform prior
nb_uniform = GaussianNB(priors=uniform_priors)
nb_uniform.fit(X_train, y_train)
y_pred_uniform = nb_uniform.predict(X_test)
accuracy_uniform = accuracy_score(y_test, y_pred_uniform)

# 2. Naïve Bayes with class-based prior probabilities
nb_class_probs = GaussianNB(priors=class_probs)
nb_class_probs.fit(X_train, y_train)
y_pred_class_probs = nb_class_probs.predict(X_test)
accuracy_class_probs = accuracy_score(y_test, y_pred_class_probs)

# 3. Naïve Bayes with custom prior probabilities
nb_custom = GaussianNB(priors=custom_priors)
nb_custom.fit(X_train, y_train)
y_pred_custom = nb_custom.predict(X_test)
accuracy_custom = accuracy_score(y_test, y_pred_custom)

# Step 5: Print the results and compare the performance
print(f"Accuracy with uniform prior probabilities: {accuracy_uniform:.4f}")
print(f"Accuracy with class-based prior probabilities: {accuracy_class_probs:.4f}")
print(f"Accuracy with custom prior probabilities: {accuracy_custom:.4f}")


20. Write a Python program to perform Recursive Feature Elimination (RFE) before training an SVM Classifier and compare accuracy.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train an SVM Classifier on the full set of features
svm_full = SVC(kernel='linear', random_state=42)
svm_full.fit(X_train, y_train)
y_pred_full = svm_full.predict(X_test)
accuracy_full = accuracy_score(y_test, y_pred_full)

# Step 4: Apply Recursive Feature Elimination (RFE) to select the best features
# We will use an SVM classifier as the estimator for RFE
selector = RFE(estimator=SVC(kernel='linear', random_state=42), n_features_to_select=2)
selector = selector.fit(X_train, y_train)

# Step 5: Train an SVM Classifier using the selected features from RFE
X_train_rfe = selector.transform(X_train)
X_test_rfe = selector.transform(X_test)

svm_rfe = SVC(kernel='linear', random_state=42)
svm_rfe.fit(X_train_rfe, y_train)
y_pred_rfe = svm_rfe.predict(X_test_rfe)
accuracy_rfe = accuracy_score(y_test, y_pred_rfe)

# Step 6: Print the results and compare the accuracy
print(f"Accuracy with all features: {accuracy_full:.4f}")
print(f"Accuracy with selected features (RFE): {accuracy_rfe:.4f}")


21.  Write a Python program to train an SVM Classifier and evaluate its performance using Precision, Recall, and F1-Score instead of accuracy.

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the SVM Classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Step 4: Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Step 5: Evaluate performance using Precision, Recall, and F1-Score

# As it's a multi-class classification problem, we will compute metrics per class and then calculate the average.
precision = precision_score(y_test, y_pred, average='weighted')  # weighted average for multi-class
recall = recall_score(y_test, y_pred, average='weighted')  # weighted average for multi-class
f1 = f1_score(y_test, y_pred, average='weighted')  # weighted average for multi-class

# Step 6: Print the results
print(f"Precision (Weighted Average): {precision:.4f}")
print(f"Recall (Weighted Average): {recall:.4f}")
print(f"F1-Score (Weighted Average): {f1:.4f}")


22. Write a Python program to train a Naïve Bayes Classifier and evaluate its performance using Log Loss (Cross-Entropy Loss).

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import log_loss

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the Naïve Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Step 4: Predict probabilities (needed for Log Loss)
y_prob = nb_classifier.predict_proba(X_test)

# Step 5: Compute the Log Loss (Cross-Entropy Loss)
log_loss_value = log_loss(y_test, y_prob)

# Step 6: Print the Log Loss
print(f"Log Loss: {log_loss_value:.4f}")


23. Write a Python program to train an SVM Classifier and visualize the Confusion Matrix using seaborn.

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train the SVM Classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Step 4: Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Step 5: Compute the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Step 6: Visualize the Confusion Matrix using Seaborn's heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=data.target_names, yticklabels=data.target_names)
plt.title('Confusion Matrix for SVM Classifier')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


24. Write a Python program to train an SVM Regressor (SVR) and evaluate its performance using Mean Absolute Error (MAE) instead of MSE.

In [None]:
import numpy as np
import pandas as pd
# Instead of load_boston, use fetch_california_housing
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# Step 1: Load the California Housing dataset
# load_boston is deprecated due to ethical concerns
#housing = load_boston()
housing = fetch_california_housing()  # Use this instead
X = housing.data
y = housing.target

# Step 2: Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Feature scaling (Standardize the features) for SVR
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# Step 4: Train an SVM Regressor (SVR) with RBF kernel
svr = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr.fit(X_train_scaled, y_train)

# Step 5: Make predictions on the test set
y_pred = svr.predict(X_test_scaled)

# Step 6: Evaluate the performance using Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")

25. Write a Python program to train a Naïve Bayes classifier and evaluate its performance using the ROC-AUC score.

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize

# Step 1: Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Step 2: Convert the multi-class problem into a binary classification problem
# We will classify if the class is "setosa" (class 0) vs. "not setosa" (class 1)
y_binary = (y == 0).astype(int)

# Step 3: Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Step 4: Train the Naïve Bayes classifier (GaussianNB for continuous data)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = nb_classifier.predict(X_test)
y_prob = nb_classifier.predict_proba(X_test)[:, 1]  # Get the probability of the positive class (class 1)

# Step 6: Evaluate performance using ROC-AUC score
roc_auc = roc_auc_score(y_test, y_prob)
print(f"ROC-AUC Score: {roc_auc:.4f}")

# Step 7: Plot the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

26. Write a Python program to train an SVM Classifier and visualize the Precision-Recall Curve.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, average_precision_score

# Step 1: Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Step 2: Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train an SVM classifier with an RBF kernel
svm_classifier = SVC(kernel='rbf', probability=True, random_state=42)
svm_classifier.fit(X_train, y_train)

# Step 4: Get the probability estimates for the positive class
y_prob = svm_classifier.predict_proba(X_test)[:, 1]  # Probability of the positive class

# Step 5: Compute precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_test, y_prob)

# Step 6: Plot the Precision-Recall curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='b', label='Precision-Recall curve (AP = %0.2f)' % average_precision_score(y_test, y_prob))
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for SVM Classifier')
plt.legend(loc='best')
plt.grid(True)
plt.show()