In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score

data = pd.read_csv(r'C:\Users\pc\Desktop\lab 13 AI full\dermatology.data', names=column_names)

# Load the dataset
column_names = ['erythema', 'scaling', 'definite_borders', 'itching', 'koebner_phenomenon', 'polygonal_papules',
                'follicular_papules', 'oral_mucosal_involvement', 'knee_and_elbow_involvement', 'scalp_involvement',
                'family_history', 'melanin_incontinence', 'eosinophils_in_the_infiltrate', 'PNL_infiltrate',
                'fibrosis_of_the_papillary_dermis', 'exocytosis', 'acanthosis', 'hyperkeratosis', 'parakeratosis',
                'dlubrication', 'spongiosis', 'saw_tooth_appearance_of_retes', 'follicular_horn_plug',
                'perifollicular_parakeratosis', 'inflammatory_monoluclear_inflitrate', 'band_like_infiltrate',
                'age', 'class']

# Preprocessing: replacing missing values with the most frequent value in each column
data = data.replace('?', np.nan)
data = data.apply(lambda x: x.fillna(x.value_counts().index[0]))

# Preprocessing: separating features and target variable
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Random train/test split (70%, 30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Naïve Bayes classification
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Perform 10-fold cross-validation with Naïve Bayes
nb_cv_scores = cross_val_score(nb_classifier, X_train, y_train, cv=StratifiedKFold(n_splits=10, shuffle=True))

# Calculate F-measure and Accuracy for Naïve Bayes
nb_f_measure = np.mean(nb_cv_scores)
nb_accuracy = nb_classifier.score(X_test, y_test)

# SVM classification
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Perform 10-fold cross-validation with SVM
svm_cv_scores = cross_val_score(svm_classifier, X_train, y_train, cv=StratifiedKFold(n_splits=10, shuffle=True))

# Calculate F-measure and Accuracy for SVM
svm_f_measure = np.mean(svm_cv_scores)
svm_accuracy = svm_classifier.score(X_test, y_test)

# Print the results
print("Naïve Bayes")
print(f"Average F-measure: {nb_f_measure}")
print(f"Accuracy: {nb_accuracy}\n")

print("Support Vector Machine")
print(f"Average F-measure: {svm_f_measure}")
print(f"Accuracy: {svm_accuracy}\n")


Naïve Bayes
Average F-measure: 0.8515384615384616
Accuracy: 0.8727272727272727

Support Vector Machine
Average F-measure: 0.9256923076923076
Accuracy: 0.9636363636363636

