In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

# Specify the file path
file_path = r'C:\Users\pc\Desktop\lab 13 AI full\data_banknote_authentication.txt'

# Read the Bank Authentication dataset
data = pd.read_csv(file_path, header=None)

# Preprocessing: separating features and target variable
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Varying train/test split [(50%, 50%), (70%, 30%), (90%, 10%)]
train_sizes = [0.5, 0.7, 0.9]

for train_size in train_sizes:
    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, stratify=y, random_state=42)

    # Support Vector Machine classification
    svm_classifier = SVC(kernel='linear', random_state=42)
    svm_classifier.fit(X_train, y_train)

    # Perform k-fold cross-validation
    k_values = [5, 10, 15]  # Varying values of k for cross-validation
    for k in k_values:
        cv_scores = cross_val_score(svm_classifier, X_train, y_train, cv=StratifiedKFold(n_splits=k, shuffle=True))

        # Display cross-validation scores
        print(f"Train/Test Split: {train_size * 100}% train / {100 - train_size * 100}% test")
        print(f"Number of Folds (k): {k}")
        print(f"Cross-validation scores: {cv_scores}")
        print(f"Average accuracy: {cv_scores.mean()}\n")

    # Make predictions on the test set
    y_pred = svm_classifier.predict(X_test)

    # Compute confusion matrix
    confusion = confusion_matrix(y_test, y_pred)
    print(f"Confusion Matrix:\n{confusion}\n")

    # Normalize confusion matrix
    normalized_confusion = confusion.astype('float') / confusion.sum(axis=1)[:, np.newaxis]
    print(f"Normalized Confusion Matrix:\n{normalized_confusion}\n")

Train/Test Split: 50.0% train / 50.0% test
Number of Folds (k): 5
Cross-validation scores: [0.98550725 0.99270073 0.99270073 1.         0.99270073]
Average accuracy: 0.9927218872315666

Train/Test Split: 50.0% train / 50.0% test
Number of Folds (k): 10
Cross-validation scores: [1.         1.         1.         0.97101449 1.         1.
 1.         0.98529412 1.         0.97058824]
Average accuracy: 0.99268968456948

Train/Test Split: 50.0% train / 50.0% test
Number of Folds (k): 15
Cross-validation scores: [0.97826087 1.         1.         0.97826087 1.         1.
 0.97826087 1.         0.95652174 1.         1.         1.
 1.         1.         1.        ]
Average accuracy: 0.9927536231884059

Confusion Matrix:
[[376   5]
 [  1 304]]

Normalized Confusion Matrix:
[[0.98687664 0.01312336]
 [0.00327869 0.99672131]]

Train/Test Split: 70.0% train / 30.0% test
Number of Folds (k): 5
Cross-validation scores: [0.98958333 0.99479167 0.99479167 0.98958333 0.99479167]
Average accuracy: 0.9927083