In [1]:
import os
import cv2
import numpy as np
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Function to extract features from images after resizing
def extract_features(image_path, target_size=(100, 100)):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to read image '{image_path}'")
        return None
    # Resize the image to a fixed size
    image = cv2.resize(image, target_size)
    # Flatten the image pixels as features
    return image.flatten()

# Path to the dataset folder
dataset_folder = '/content/sample_data/skin-dataset'

# List to store features and labels
features = []
labels = []

# Iterate over subfolders (oily, dry, and normal)
for skin_type in os.listdir(dataset_folder):
    # Skip '.ipynb_checkpoints' folder
    if skin_type == '.ipynb_checkpoints':
        continue
    skin_type_folder = os.path.join(dataset_folder, skin_type)
    # Skip non-directory items
    if not os.path.isdir(skin_type_folder):
        continue
    # Assign label index for each skin type
    label_index = ['dry', 'normal', 'oily'].index(skin_type)
    # Iterate over each image file in the subfolder
    for filename in os.listdir(skin_type_folder):
        if filename.endswith(".jpg") or filename.endswith(".jpeg"):
            # Extract features from the image
            image_path = os.path.join(skin_type_folder, filename)
            image_features = extract_features(image_path)
            if image_features is not None:
                # Add features and corresponding label to lists
                features.append(image_features)
                labels.append(label_index)

# Convert lists to numpy arrays
X = np.array(features)
y = np.array(labels)

# Check if X or y is empty
if len(X) == 0 or len(y) == 0:
    print("Error: No data found")
    exit()

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize base classifier (Decision Tree)
base_classifier = DecisionTreeClassifier()

# Initialize the BaggingClassifier
bagging_classifier = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, random_state=42)

# Training the BaggingClassifier on the training data
bagging_classifier.fit(X_train, y_train)

# Predicting labels for the test set
y_pred = bagging_classifier.predict(X_test)

# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)




Accuracy: 0.883495145631068
