In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors

class BMFK:
    def __init__(self, n_neighbors=5, m=2, p=2, q=2):
        self.n_neighbors = n_neighbors
        self.m = m  # fuzzy strength parameter
        self.p = p  # Bonferroni p parameter
        self.q = q  # Bonferroni q parameter

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = np.unique(y)
        self.nn = NearestNeighbors(n_neighbors=self.n_neighbors, metric='minkowski', p=2)
        self.nn.fit(X)

    def bonferroni_mean(self, values):
        n = len(values)
        if n <= 1:
            return np.mean(values)  # Return simple mean if there's only one value
        sum_pq = 0
        for i in range(n):
            for j in range(n):
                if i != j:
                    sum_pq += values[i]**self.p * values[j]**self.q
        return (sum_pq / (n * (n-1)))**(1 / (self.p + self.q))

    def predict(self, X):
        predictions = []
        for x in X:
            distances, indices = self.nn.kneighbors([x])
            neighbors = self.X[indices[0]]
            neighbor_labels = self.y[indices[0]]
            
            # Calculate fuzzy memberships
            memberships = 1 / (distances[0] ** (2 / (self.m - 1)) + 1e-8)
            memberships /= np.sum(memberships)
            
            # Calculate class memberships using Bonferroni mean
            class_memberships = {}
            for c in self.classes:
                class_indices = neighbor_labels == c
                if np.any(class_indices):
                    class_memberships[c] = self.bonferroni_mean(memberships[class_indices])
                else:
                    class_memberships[c] = 0
            
            # Predict the class with highest membership
            predictions.append(max(class_memberships, key=class_memberships.get))
        
        return np.array(predictions)

# Load the dataset
file_path = "PCOS_data_without_infertility.xlsx"
df = pd.read_excel(file_path, sheet_name="Full_new")

# Data Preprocessing
df = df.drop(columns=['Sl. No', 'Patient File No.', 'Unnamed: 44'])
df = df.apply(pd.to_numeric, errors='coerce')
df.fillna(df.median(), inplace=True)

# Encode categorical columns
categorical_columns = ['Blood Group', 'Cycle(R/I)', 'Pregnant(Y/N)', 
                       'Weight gain(Y/N)', 'hair growth(Y/N)', 
                       'Skin darkening (Y/N)', 'Hair loss(Y/N)', 
                       'Pimples(Y/N)', 'Fast food (Y/N)', 
                       'Reg.Exercise(Y/N)']
for col in categorical_columns:
    if col in df.columns:
        df[col] = df[col].astype('category').cat.codes

# Split into features and target
X = df.drop(columns=['PCOS (Y/N)'])
y = df['PCOS (Y/N)'].values  # Convert to numpy array

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the different feature sets
ensemble_selected_features = ['Follicle No. (L)', 'hair growth(Y/N)', 'Follicle No. (R)', 
                              'Cycle(R/I)', 'Fast food (Y/N)', 'AMH(ng/mL)', 'Skin darkening (Y/N)', 
                              'Weight gain(Y/N)', 'Pimples(Y/N)', 'Cycle length(days)', 
                              'Hip(inch)', 'Weight (Kg)', 'FSH/LH', 'FSH(mIU/mL)']

rl_selected_features = [' Age (yrs)', 'Weight (Kg)', 'Pulse rate(bpm) ', 'RR (breaths/min)', 'Hb(g/dl)', 'Cycle(R/I)', 'Cycle length(days)', 'Marraige Status (Yrs)', '  I   beta-HCG(mIU/mL)', 'FSH(mIU/mL)', 'Waist(inch)', 'Waist:Hip Ratio', 'TSH (mIU/L)', 'AMH(ng/mL)', 'Vit D3 (ng/mL)', 'PRG(ng/mL)', 'RBS(mg/dl)', 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)', 'Pimples(Y/N)', 'BP _Systolic (mmHg)', 'BP _Diastolic (mmHg)', 'Follicle No. (L)', 'Follicle No. (R)']

proposed_selected_features = [
        'Follicle No. (L)', 'hair growth(Y/N)', 'Follicle No. (R)',
        'Cycle(R/I)', 'Fast food (Y/N)', 'Skin darkening (Y/N)',
        'Cycle length(days)', 'FSH/LH', ' Age (yrs)', 'Weight (Kg)',
        'Hip(inch)'  # Added more features identified as important
    ]

# 1. BMFK on Ensemble Selected Features
X_ensemble = X_scaled[:, [X.columns.get_loc(feature) for feature in ensemble_selected_features]]

# 2. BMFK on RL Selected Features
X_rl = X_scaled[:, [X.columns.get_loc(feature) for feature in rl_selected_features]]

# 3. BMFK on Proposed BMFK Selected Features
X_proposed = X_scaled[:, [X.columns.get_loc(feature) for feature in proposed_selected_features]]

# 4. BMFK on All Features (no feature selection)
X_all = X_scaled  # Use all features

# Helper function to run cross-validation
def run_bmfk_cv(X, y):
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    accuracies = []
    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        model = BMFK(n_neighbors=5, m=2, p=1, q=1)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
    
    return np.mean(accuracies)

# Run BMFK for all four cases
accuracy_ensemble = run_bmfk_cv(X_ensemble, y)
accuracy_rl = run_bmfk_cv(X_rl, y)
accuracy_proposed = run_bmfk_cv(X_proposed, y)
accuracy_all = run_bmfk_cv(X_all, y)

print(f"BMFK on Ensemble Selected Features Accuracy: {accuracy_ensemble:.4f}")
print(f"BMFK on RL Selected Features Accuracy: {accuracy_rl:.4f}")
print(f"BMFK on Proposed Selected Features Accuracy: {accuracy_proposed:.4f}")
print(f"BMFK on All Features Accuracy: {accuracy_all:.4f}")


BMFK on Ensemble Selected Features Accuracy: 0.8113
BMFK on RL Selected Features Accuracy: 0.8261
BMFK on Proposed Selected Features Accuracy: 0.8189
BMFK on All Features Accuracy: 0.8151


In [1]:
from tabulate import tabulate
import pandas as pd

# Create data as a list of dictionaries
data = [
    {
        "Methods": "Ensemble filter+BEEO(RL)+BMFK(proposed)",
        "Accuracy": 0.8559,
        "Friedman mean rank": 1.2,
        "Rank": 1.0
    },
    {
        "Methods": "Without ensemble filter & with BEO-RL",
        "Accuracy": 0.8261,
        "Friedman mean rank": 2.8,
        "Rank": 2.0
    },
    {
        "Methods": "without filter & wrapper(all 44 features to BMFK classifier)",
        "Accuracy": 0.8151,
        "Friedman mean rank": 3.6,
        "Rank": 4.0
    },
    {
        "Methods": "With ensemble filter & with BEO alone (NO RL)",
        "Accuracy": 0.8113,
        "Friedman mean rank": 4.2,
        "Rank": 5.0
    },
    {
        "Methods": "With ensemble filter + BMFK",
        "Accuracy": 0.8233,
        "Friedman mean rank": 3.2,
        "Rank": 3.0
    }
]

# Convert to DataFrame
df = pd.DataFrame(data)

# Print using tabulate for nice formatting
print("\nTable 1: Comparison of Different Methods")
print(tabulate(df, headers='keys', tablefmt='grid', showindex=False, 
              floatfmt=".4f"))

# Print statistical test results
print("\nStatistical Test Results:")
print("=" * 50)
print(f"Friedman test statistic = {10.192:.3f}")
print(f"p-value = {0.037:.3f}")


Table 1: Comparison of Different Methods
+--------------------------------------------------------------+------------+----------------------+--------+
| Methods                                                      |   Accuracy |   Friedman mean rank |   Rank |
| Ensemble filter+BEEO(RL)+BMFK(proposed)                      |     0.8559 |               1.2000 | 1.0000 |
+--------------------------------------------------------------+------------+----------------------+--------+
| Without ensemble filter & with BEO-RL                        |     0.8261 |               2.8000 | 2.0000 |
+--------------------------------------------------------------+------------+----------------------+--------+
| without filter & wrapper(all 44 features to BMFK classifier) |     0.8151 |               3.6000 | 4.0000 |
+--------------------------------------------------------------+------------+----------------------+--------+
| With ensemble filter & with BEO alone (NO RL)                |     0.8113 | 

In [2]:
!pip install tabulate

