### Extraction de caracteristiques

In [5]:
import cv2
import os
import pandas as pd
from skimage import feature
from itertools import product
import numpy as np

def extract_lbp_features(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Compute LBP features
    lbp = feature.local_binary_pattern(gray, 8, 1, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 10 + 3), range=(0, 10 + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    
    return hist

def process_images(root_folder):
    data = []

    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        # Check if the item in the directory is a subdirectory
        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                image_path = os.path.join(folder_path, filename)

                # Ensure that the file is an image (you can customize this based on your image formats)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
                    # Read the image
                    image = cv2.imread(image_path)

                    # Check if the image is successfully read
                    if image is not None:
                        # Divide the image into blocks
                        blocks = [image[i:i+10, j:j+10] for i, j in product(range(0, image.shape[0], 10), range(0, image.shape[1], 10))]

                        # Initialize an array to hold the features for this image
                        features_for_image = []

                        # Extract LBP features for each block
                        for block in blocks:
                            lbp_features = extract_lbp_features(block)
                            # Append the features for each block to the feature array
                            features_for_image.extend(lbp_features)

                        # Once all blocks are processed, append the features for the entire image to the data list
                        data.append([folder_name] + features_for_image + [filename])
                    else:
                        print(f"Error reading image: {image_path}")
                else:
                    print(f"Skipping non-image file: {image_path}")

    return data

def save_to_csv(data, csv_filename):
    # Determine the number of LBP features per image
    num_features = len(data[0]) - 2  # subtract the folder name and filename
    
    # Create column names based on the number of features
    columns =   [f'LBP_{i}' for i in range(num_features)] + ['ImageFile']+['ClassLabel']
    
    # Create a DataFrame with the correct column names
    df = pd.DataFrame(data, columns=columns)
    
    # Save the DataFrame to a CSV file
    df.to_csv(csv_filename, index=False)

root_folder = '/Users/oumaima/Downloads/Player_em'  # Update this path to your dataset
data = process_images(root_folder)
save_to_csv(data, '/Users/oumaima/Downloads/eqm_file.csv')


FileNotFoundError: [Errno 2] No such file or directory: '/Users/oumaima/Downloads/Player_em'

In [6]:
import re
column_name = [col for col in lbp_data.columns if re.search('classlabel', col, re.I)]
print(column_name)  # This should print the column name that matches the pattern
lbp_data = pd.read_csv('/Users/oumaima/Downloads/eqm_file.csv')
# Then use the exact column name found
y = lbp_data[column_name[0]]  # Access the column using the matched name

['ClassLabel;LBP_1;LBP_2;LBP_3;LBP_4;LBP_5;LBP_6;LBP_7;LBP_8;LBP_9;LBP_10;LBP_11;LBP_12;LBP_13;LBP_14;LBP_15;LBP_16;LBP_17;LBP_18;LBP_19;LBP_20;LBP_21;LBP_22;LBP_23;LBP_24;LBP_25;LBP_26;LBP_27;LBP_28;LBP_29;LBP_30;LBP_31;LBP_32;LBP_33;LBP_34;LBP_35;LBP_36;LBP_37;LBP_38;LBP_39;LBP_40;LBP_41;LBP_42;LBP_43;LBP_44;LBP_45;LBP_46;LBP_47;LBP_48;LBP_49;LBP_50;LBP_51;LBP_52;LBP_53;LBP_54;LBP_55;LBP_56;LBP_57;LBP_58;LBP_59;LBP_60;LBP_61;LBP_62;LBP_63;LBP_64;LBP_65;LBP_66;LBP_67;LBP_68;LBP_69;LBP_70;LBP_71;LBP_72;LBP_73;LBP_74;LBP_75;LBP_76;LBP_77;LBP_78;LBP_79;LBP_80;LBP_81;LBP_82;LBP_83;LBP_84;LBP_85;LBP_86;LBP_87;LBP_88;LBP_89;LBP_90;LBP_91;LBP_92;LBP_93;LBP_94;LBP_95;LBP_96;LBP_97;LBP_98;LBP_99;LBP_100;LBP_101;LBP_102;LBP_103;LBP_104;LBP_105;LBP_106;LBP_107;LBP_108;LBP_109;LBP_110;LBP_111;LBP_112;LBP_113;LBP_114;LBP_115;LBP_116;LBP_117;LBP_118;LBP_119;LBP_120;LBP_121;LBP_122;LBP_123;LBP_124;LBP_125;LBP_126;LBP_127;LBP_128;LBP_129;LBP_130;LBP_131;LBP_132;LBP_133;LBP_134;LBP_135;LBP_136;LBP_137

### Modele Random Forest

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load LBP features and class labels from CSV
lbp_data = pd.read_csv('/Users/oumaima/Downloads/eqm_file.csv', delimiter=';')
# Strip leading and trailing spaces from column names
# Normalize and encode the column names


# Extract features and labels
X = lbp_data.iloc[:, 1:]
y = lbp_data['ClassLabel']

# Convert class labels to numerical format
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Initialize a RandomForestClassifier as the base estimator
rf_classifier = RandomForestClassifier(n_estimators=300, random_state=42)

# Initialize a BaggingClassifier with the RandomForestClassifier as the base estimator
bagging_classifier = BaggingClassifier(base_estimator=rf_classifier, n_estimators=50, random_state=42)

# Train the bagging classifier
bagging_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = bagging_classifier.predict(X_test)

# Decode the predicted labels back to original class labels
y_pred_original = label_encoder.inverse_transform(y_pred)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')




Accuracy: 54.69%


### Modele SVM

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import joblib

df = pd.read_csv('/Users/oumaima/Downloads/eqm_file.csv', delimiter=';')

# Convert LBP features from string to a numeric array only if they are strings
for col in df.columns[1:]:  # Skip 'ClassLabel' column
    # Check if the column data type is string
    if df[col].dtype == object:
        # Convert from string to numeric array
        df[col] = df[col].apply(lambda x: np.fromstring(x, sep=' ') if isinstance(x, str) else x)

# Extract features and labels
X = df.iloc[:, 1:]  # Features
y = df['ClassLabel']  # Labels

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameters for grid search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Example range, adjust as needed
    'kernel': ['linear', 'rbf'],  # Example kernels, adjust as needed
    # Add other parameters as needed
}

# Create an SVM model
svm_model = SVC()

# Create GridSearchCV object
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and the corresponding best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test set with the best model
y_test_pred = best_model.predict(X_test_scaled)

# Print the best parameters
print(f"Best Parameters: {best_params}")

# Print the accuracy on the test set
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Accuracy on the test set with best model: {accuracy:.2f}")
joblib.dump(scaler, 'scaler.pkl')


Best Parameters: {'C': 10, 'kernel': 'rbf'}
Accuracy on the test set with best model: 0.76


['scaler.pkl']