In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load extracted features and labels
features = np.load("manual_features.npy")
labels = np.load("manual_labels.npy")

# Convert labels to numerical values if they are strings
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)  # Convert class names to integers

# Split dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy * 100:.2f}%")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Random Forest Accuracy: 55.93%

Classification Report:
                      precision    recall  f1-score   support

           1. Eczema       0.47      0.53      0.50       200
         2. Melanoma       0.74      0.78      0.76       200
3. Atopic Dermatitis       0.39      0.36      0.37       200
 4. Melanocytic Nevi       0.72      0.69      0.70       200
 5. Benign Keratosis       0.61      0.69      0.65       200
6. Fungal Infections       0.50      0.52      0.51       200
 7. Viral Infections       0.45      0.35      0.39       200

            accuracy                           0.56      1400
           macro avg       0.55      0.56      0.56      1400
        weighted avg       0.55      0.56      0.56      1400



In [7]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer  # For handling NaNs

# Load extracted features and labels
features = np.load("manual_features.npy")
labels = np.load("manual_labels.npy")

# Convert labels to numerical values if they are strings
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)  # Convert class names to integers

# Handle NaN values by replacing with the column mean
imputer = SimpleImputer(strategy="mean")  # You can also use "median"
features = imputer.fit_transform(features)

# Split dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)

# Normalize the features (SVM is sensitive to feature scaling)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Support Vector Machine classifier with RBF kernel
clf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"SVM Accuracy: {accuracy * 100:.2f}%")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


SVM Accuracy: 52.71%

Classification Report:
                      precision    recall  f1-score   support

           1. Eczema       0.44      0.58      0.50       200
         2. Melanoma       0.70      0.70      0.70       200
3. Atopic Dermatitis       0.37      0.27      0.31       200
 4. Melanocytic Nevi       0.71      0.73      0.72       200
 5. Benign Keratosis       0.60      0.69      0.65       200
6. Fungal Infections       0.43      0.42      0.43       200
 7. Viral Infections       0.35      0.28      0.31       200

            accuracy                           0.53      1400
           macro avg       0.52      0.53      0.52      1400
        weighted avg       0.52      0.53      0.52      1400



In [9]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load manual feature set and labels
manual_features = np.load("manual_features.npy")  # Change filename if different
labels = np.load("manual_labels.npy")

# Handle missing values (if any)
manual_features = np.nan_to_num(manual_features)  # Replace NaN with 0

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(manual_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Standardize the features (IMPORTANT for k-NN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train k-NN classifier (k=5, default)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predictions
y_pred = knn.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"k-NN Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


k-NN Accuracy: 50.21%

Classification Report:
                       precision    recall  f1-score   support

           1. Eczema       0.39      0.58      0.47       200
         2. Melanoma       0.65      0.74      0.69       200
3. Atopic Dermatitis       0.30      0.27      0.28       200
 4. Melanocytic Nevi       0.66      0.69      0.68       200
 5. Benign Keratosis       0.60      0.65      0.62       200
6. Fungal Infections       0.45      0.41      0.43       200
 7. Viral Infections       0.39      0.17      0.24       200

            accuracy                           0.50      1400
           macro avg       0.49      0.50      0.49      1400
        weighted avg       0.49      0.50      0.49      1400



found 0 physical cores < 1
  File "D:\Anaconda\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load manual feature set and labels
manual_features = np.load("manual_features.npy")  # Change filename if different
labels = np.load("manual_labels.npy")

# Handle missing values (if any)
manual_features = np.nan_to_num(manual_features)  # Replace NaN with 0

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(manual_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train Decision Tree classifier
dt = DecisionTreeClassifier(criterion="gini", max_depth=None, random_state=42)  # Try 'entropy' for criterion
dt.fit(X_train, y_train)

# Predictions
y_pred = dt.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Decision Tree Accuracy: 45.57%

Classification Report:
                       precision    recall  f1-score   support

           1. Eczema       0.39      0.38      0.39       200
         2. Melanoma       0.67      0.66      0.67       200
3. Atopic Dermatitis       0.33      0.30      0.32       200
 4. Melanocytic Nevi       0.57      0.58      0.58       200
 5. Benign Keratosis       0.56      0.56      0.56       200
6. Fungal Infections       0.34      0.35      0.35       200
 7. Viral Infections       0.33      0.36      0.34       200

            accuracy                           0.46      1400
           macro avg       0.46      0.46      0.46      1400
        weighted avg       0.46      0.46      0.46      1400



In [15]:
!pip install xgboost


Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.0-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/150.0 MB 1.3 MB/s eta 0:01:56
   ---------------------------------------- 0.5/150.0 MB 1.3 MB/s eta 0:01:56
   ---------------------------------------- 1.0/150.0 MB 1.3 MB/s eta 0:01:56
   ---------------------------------------- 1.3/150.0 MB 1.3 MB/s eta 0:01:56
   ---------------------------------------- 1.6/150.0 MB 1.3 MB/s eta 0:01:55
   ---------------------------------------- 1.8/150.0 MB 1.3 MB/s eta 0:01:55
    --------------------------------------- 2.1/150.0 MB 1.3 MB/s eta 0:01:55
    --------------------------------------- 2.4/150.0 MB 1.3 MB/s eta 0:01:55
    --------------------------------------- 2.6/150.0 MB 1.3 MB/s eta 0:01:55
    ----

In [19]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load features and labels
X = np.load("manual_features.npy")
y = np.load("manual_labels.npy")

# Check label format
print("Unique Labels:", np.unique(y))

# Convert string labels to numeric if needed
if y.dtype.kind in {'U', 'O'}:  # Check if labels are strings
    unique_classes = np.unique(y)
    class_mapping = {label: idx for idx, label in enumerate(unique_classes)}
    y = np.array([class_mapping[label] for label in y])  # Convert labels to numbers

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train XGBoost classifier
xgb_clf = xgb.XGBClassifier(
    objective="multi:softmax", 
    num_class=len(np.unique(y)),  
    eval_metric="mlogloss", 
    use_label_encoder=False,
    random_state=42
)
xgb_clf.fit(X_train, y_train)

# Predictions
y_pred = xgb_clf.predict(X_test)

# Print results
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred) * 100)
print("Classification Report:\n", classification_report(y_test, y_pred))


Unique Labels: ['1. Eczema' '2. Melanoma' '3. Atopic Dermatitis' '4. Melanocytic Nevi'
 '5. Benign Keratosis' '6. Fungal Infections' '7. Viral Infections']


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Accuracy: 59.0
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.54      0.53       200
           1       0.80      0.82      0.81       200
           2       0.43      0.42      0.42       200
           3       0.79      0.73      0.76       200
           4       0.65      0.76      0.70       200
           5       0.48      0.48      0.48       200
           6       0.44      0.39      0.41       200

    accuracy                           0.59      1400
   macro avg       0.59      0.59      0.59      1400
weighted avg       0.59      0.59      0.59      1400



In [23]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load features and labels
X = np.load("manual_features.npy")
y = np.load("manual_labels.npy")

from sklearn.impute import SimpleImputer

# Handle NaN values by replacing them with the mean of the column
imputer = SimpleImputer(strategy="mean")  # Options: "median", "most_frequent"
X = imputer.fit_transform(X)

# Convert labels to numeric if necessary
if y.dtype.kind in {'U', 'O'}:  # Check if labels are strings
    unique_classes = np.unique(y)
    class_mapping = {label: idx for idx, label in enumerate(unique_classes)}
    y = np.array([class_mapping[label] for label in y])  # Convert labels to numbers

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train Naïve Bayes classifier
nb_clf = GaussianNB()
nb_clf.fit(X_train, y_train)

# Predictions
y_pred = nb_clf.predict(X_test)

# Print results
print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred) * 100)
print("Classification Report:\n", classification_report(y_test, y_pred))

Naïve Bayes Accuracy: 37.57142857142857
Classification Report:
               precision    recall  f1-score   support

           0       0.41      0.57      0.48       200
           1       0.36      0.60      0.45       200
           2       0.28      0.20      0.23       200
           3       0.41      0.76      0.53       200
           4       0.52      0.27      0.36       200
           5       0.37      0.10      0.16       200
           6       0.23      0.14      0.17       200

    accuracy                           0.38      1400
   macro avg       0.37      0.38      0.34      1400
weighted avg       0.37      0.38      0.34      1400



In [25]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

# Load manual features and labels
X = np.load("manual_features.npy")
y = np.load("manual_labels.npy")

# Handle missing values (NaN) in X
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)

# Convert string labels to numerical
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train Logistic Regression model
log_reg = LogisticRegression(max_iter=1000)  # Increase iterations if needed
log_reg.fit(X_train, y_train)

# Predictions
y_pred = log_reg.predict(X_test)

# Print results
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.45571428571428574
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.57      0.49       200
           1       0.52      0.62      0.57       200
           2       0.35      0.20      0.25       200
           3       0.61      0.64      0.62       200
           4       0.47      0.53      0.50       200
           5       0.40      0.40      0.40       200
           6       0.31      0.23      0.26       200

    accuracy                           0.46      1400
   macro avg       0.44      0.46      0.44      1400
weighted avg       0.44      0.46      0.44      1400



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
