In [21]:
# Import necessary libraries and modules for data handling and machine learning
import pandas as pd  # Pandas library for data manipulation and analysis
from sklearn.svm import SVC  # Support Vector Classification from Scikit-learn SVM module
from sklearn.linear_model import LogisticRegression  # Logistic Regression model from Scikit-learn
from sklearn.model_selection import train_test_split  # Split arrays or matrices into random train and test subsets
from sklearn.metrics import accuracy_score, confusion_matrix  # Import accuracy score and confusion matrix for model evaluation

In [22]:
# Load the dataset
skin_df = pd.read_csv('Skin_NonSkin.txt', delimiter=',')  # Load data from the specified path using pandas, specifying comma as delimiter

In [23]:
# Data preparation
X = skin_df.drop(columns='skin')  # Features: drop the 'skin' column from the dataset to use all other columns as features
y = skin_df['skin']  # Target variable: use 'skin' column as the target variable for predictions

In [24]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)  # Split data, 70% for training and 30% for testing

In [25]:
# SVM Model
svm_model = SVC()  # Instantiate the Support Vector Classifier with default parameters
svm_model.fit(X_train, y_train)  # Fit/train the SVM model on the training set

In [26]:
# Logistic Regression Model
logistic_model = LogisticRegression(max_iter=200)  # Instantiate Logistic Regression with increased max_iter for ensuring convergence
logistic_model.fit(X_train, y_train)  # Fit/train the Logistic Regression model on the training set

In [27]:
# Model Evaluation for SVM
svm_predictions = svm_model.predict(X_test)  # Use the trained SVM model to make predictions on the test set
svm_accuracy = accuracy_score(y_test, svm_predictions)  # Calculate the accuracy of the SVM model on the test set
svm_confusion_matrix = confusion_matrix(y_test, svm_predictions)  # Generate the confusion matrix for the SVM model predictions

In [28]:
# Model Evaluation for Logistic Regression
logistic_predictions = logistic_model.predict(X_test)  # Use the trained Logistic Regression model to make predictions on the test set
logistic_accuracy = accuracy_score(y_test, logistic_predictions)  # Calculate the accuracy of the Logistic Regression model on the test set
logistic_confusion_matrix = confusion_matrix(y_test, logistic_predictions)  # Generate the confusion matrix for the Logistic Regression model predictions

In [29]:
# Printing results
print("SVM Accuracy:", svm_accuracy)  # Print the accuracy of the SVM model
print("SVM Confusion Matrix:\n", svm_confusion_matrix)  # Print the confusion matrix of the SVM model
print("Logistic Regression Accuracy:", logistic_accuracy)  # Print the accuracy of the Logistic Regression model
print("Logistic Regression Confusion Matrix:\n", logistic_confusion_matrix)  # Print the confusion matrix of the Logistic Regression model

SVM Accuracy: 0.9981637150085694
SVM Confusion Matrix:
 [[15173     1]
 [  134 58210]]
Logistic Regression Accuracy: 0.9183057210479066
Logistic Regression Confusion Matrix:
 [[12439  2735]
 [ 3271 55073]]
