In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors
from scipy.stats import rankdata
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

class BMFK:
    def __init__(self, n_neighbors=5, m=2, p=2, q=2):
        self.n_neighbors = n_neighbors
        self.m = m  # fuzzy strength parameter
        self.p = p  # Bonferroni p parameter
        self.q = q  # Bonferroni q parameter

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = np.unique(y)
        self.nn = NearestNeighbors(n_neighbors=self.n_neighbors, metric='minkowski', p=2)
        self.nn.fit(X)

    def bonferroni_mean(self, values):
        n = len(values)
        if n <= 1:
            return np.mean(values)  # Return simple mean if there's only one value
        sum_pq = 0
        for i in range(n):
            for j in range(n):
                if i != j:
                    sum_pq += values[i]**self.p * values[j]**self.q
        return (sum_pq / (n * (n-1)))**(1 / (self.p + self.q))

    def predict(self, X):
        predictions = []
        for x in X:
            distances, indices = self.nn.kneighbors(np.array([x]))  # Ensure x is a 2D array
            neighbors = self.X[indices[0]]
            neighbor_labels = self.y[indices[0]]
            
            # Calculate fuzzy memberships
            memberships = 1 / (distances[0] ** (2 / (self.m - 1)) + 1e-8)
            memberships /= np.sum(memberships)
            
            # Calculate class memberships using Bonferroni mean
            class_memberships = {}
            for c in self.classes:
                class_indices = neighbor_labels == c
                if np.any(class_indices):
                    class_memberships[c] = self.bonferroni_mean(memberships[class_indices])
                else:
                    class_memberships[c] = 0
            
            # Predict the class with highest membership
            predictions.append(max(class_memberships, key=class_memberships.get))
        
        return np.array(predictions)

def load_and_preprocess_data(file_path):
    df = pd.read_excel(file_path, sheet_name="Full_new")
    
    columns_to_drop = ['Sl. No', 'Patient File No.', 'Unnamed: 44']
    df = df.drop(columns=columns_to_drop, errors='ignore')
    
    df = df.apply(pd.to_numeric, errors='coerce')
    df.fillna(df.median(), inplace=True)
    
    categorical_columns = [
        'Blood Group', 'Cycle(R/I)', 'Pregnant(Y/N)', 
        'Weight gain(Y/N)', 'hair growth(Y/N)', 
        'Skin darkening (Y/N)', 'Hair loss(Y/N)', 
        'Pimples(Y/N)', 'Fast food (Y/N)', 
        'Reg.Exercise(Y/N)'
    ]
    
    for col in categorical_columns:
        if col in df.columns:
            df[col] = df[col].astype('category').cat.codes
    
    df.columns = df.columns.str.strip()  # Strip whitespace from column names
    return df

def evaluate_bmfk(X_data, y_data):
    # Split the data into training (90%) and testing (10%) sets
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.1, random_state=42)
    
    # Initialize BMFK classifier
    bmfk_clf = BMFK(n_neighbors=5, m=2, p=2, q=2)
    bmfk_clf.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = bmfk_clf.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy

def main():
    # Load and preprocess data
    file_path = "PCOS_data_without_infertility.xlsx"
    df = load_and_preprocess_data(file_path)
    
    # Define feature sets
    selected_features_proposed = [
        'Follicle No. (L)', 'hair growth(Y/N)', 'Follicle No. (R)', 
        'Cycle(R/I)', 'Fast food (Y/N)', 'Skin darkening (Y/N)', 
        'Cycle length(days)', 'FSH/LH'
    ]
    
    ensemble_selected_features = [
        'Follicle No. (L)', 'hair growth(Y/N)', 'Follicle No. (R)', 
        'Cycle(R/I)', 'Fast food (Y/N)', 'AMH(ng/mL)', 
        'Skin darkening (Y/N)', 'Weight gain(Y/N)', 
        'Pimples(Y/N)', 'Cycle length(days)', 
        'Hip(inch)', 'Weight (Kg)', 'FSH/LH', 'FSH(mIU/mL)'
    ]
    
    rl_selected_features = [
        'Age (yrs)', 'Height(Cm)', 'RR (breaths/min)', 'No. of aborptions', 
        'Hip(inch)', 'Waist:Hip Ratio', 'AMH(ng/mL)', 'PRG(ng/mL)', 
        'Fast food (Y/N)', 'Follicle No. (R)', 'Avg. F size (L) (mm)', 
        'Endometrium (mm)'
    ]
    
    # Prepare data
    X = df.drop(columns=['PCOS (Y/N)'])
    y = df['PCOS (Y/N)'].values  # Convert to numpy array

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Prepare datasets
    X_proposed = X_scaled[:, [X.columns.get_loc(feature) for feature in selected_features_proposed]]
    X_ensemble = X_scaled[:, [X.columns.get_loc(feature) for feature in ensemble_selected_features]]
    X_rl = X_scaled[:, [X.columns.get_loc(feature) for feature in rl_selected_features]]
    
    # Calculate accuracies for each method using BMFK optimizer
    accuracy_proposed = evaluate_bmfk(X_proposed, y)  # Proposed method
    accuracy_ensemble = evaluate_bmfk(X_ensemble, y)  # Ensemble method
    accuracy_rl = evaluate_bmfk(X_rl, y)  # RL method

    # Create a dictionary to hold the accuracies
    accuracies = {
        "Ensemble Filter + BEEO (RL) + BMFK (Proposed)": accuracy_proposed,
        "With Ensemble Filter & BEO alone (No RL)": accuracy_ensemble,
        "Without Ensemble Filter & with BEO-RL": accuracy_rl,
    }

    # Convert accuracies into an array for ranking
    accuracy_values = np.array(list(accuracies.values()))

    # Calculate ranks (lower rank is better, so higher accuracy gets lower rank)
    ranks = rankdata(-accuracy_values)  # Negative to rank by highest accuracy

    # Create a DataFrame to format the table neatly
    df_results = pd.DataFrame({
        "Method": list(accuracies.keys()),
        "Accuracy": accuracy_values,
        "Rank": ranks
    })

    # Assign final rank based on mean rank
    final_ranks = rankdata(ranks)

    # Add final rank to the DataFrame
    df_results["Final Rank"] = final_ranks

    # Display the results
    print("\n--- Accuracy, Friedman Mean Ranks, and Final Ranks ---")
    print(df_results)

if __name__ == "__main__":
    main()



--- Accuracy, Friedman Mean Ranks, and Final Ranks ---
                                          Method  Accuracy  Rank  Final Rank
0  Ensemble Filter + BEEO (RL) + BMFK (Proposed)  0.836364   2.0         2.0
1       With Ensemble Filter & BEO alone (No RL)  0.854545   1.0         1.0
2          Without Ensemble Filter & with BEO-RL  0.818182   3.0         3.0
