In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors

class BMFK:
    def __init__(self, n_neighbors=5, m=2, p=2, q=2):
        self.n_neighbors = n_neighbors
        self.m = m  # fuzzy strength parameter
        self.p = p  # Bonferroni p parameter
        self.q = q  # Bonferroni q parameter

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = np.unique(y)
        self.nn = NearestNeighbors(n_neighbors=self.n_neighbors, metric='minkowski', p=2)
        self.nn.fit(X)

    def bonferroni_mean(self, values):
        n = len(values)
        if n <= 1:
            return np.mean(values)  # Return simple mean if there's only one value
        sum_pq = 0
        for i in range(n):
            for j in range(n):
                if i != j:
                    sum_pq += values[i]**self.p * values[j]**self.q
        return (sum_pq / (n * (n-1)))**(1 / (self.p + self.q))

    def predict(self, X):
        predictions = []
        for x in X:
            distances, indices = self.nn.kneighbors([x])
            neighbors = self.X[indices[0]]
            neighbor_labels = self.y[indices[0]]
            
            # Calculate fuzzy memberships
            memberships = 1 / (distances[0] ** (2 / (self.m - 1)) + 1e-8)
            memberships /= np.sum(memberships)
            
            # Calculate class memberships using Bonferroni mean
            class_memberships = {}
            for c in self.classes:
                class_indices = neighbor_labels == c
                if np.any(class_indices):
                    class_memberships[c] = self.bonferroni_mean(memberships[class_indices])
                else:
                    class_memberships[c] = 0
            
            # Predict the class with highest membership
            predictions.append(max(class_memberships, key=class_memberships.get))
        
        return np.array(predictions)

# Load the dataset
file_path = "PCOS_data_without_infertility.xlsx"
df = pd.read_excel(file_path, sheet_name="Full_new")

# Data Preprocessing
df = df.drop(columns=['Sl. No', 'Patient File No.', 'Unnamed: 44'])
df = df.apply(pd.to_numeric, errors='coerce')
df.fillna(df.median(), inplace=True)

# Encode categorical columns
categorical_columns = ['Blood Group', 'Cycle(R/I)', 'Pregnant(Y/N)', 
                       'Weight gain(Y/N)', 'hair growth(Y/N)', 
                       'Skin darkening (Y/N)', 'Hair loss(Y/N)', 
                       'Pimples(Y/N)', 'Fast food (Y/N)', 
                       'Reg.Exercise(Y/N)']
for col in categorical_columns:
    if col in df.columns:
        df[col] = df[col].astype('category').cat.codes

# Split into features and target
X = df.drop(columns=['PCOS (Y/N)'])
y = df['PCOS (Y/N)'].values  # Convert to numpy array

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Selected features from Ensemble filter alone (no RL)
selected_features = [' Age (yrs)', 'Height(Cm) ', 'RR (breaths/min)', 'No. of aborptions', 'Hip(inch)', 'Waist:Hip Ratio', 'AMH(ng/mL)', 'PRG(ng/mL)', 'Fast food (Y/N)', 'Follicle No. (R)', 'Avg. F size (L) (mm)', 'Endometrium (mm)'] 
X_selected = X_scaled[:, [X.columns.get_loc(feature) for feature in selected_features]]

# Cross-validation for BMFK
kf = KFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []

for train_idx, test_idx in kf.split(X_selected):
    X_train, X_test = X_selected[train_idx], X_selected[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # Initialize BMFK model
    model = BMFK(n_neighbors=5, m=2, p=1, q=1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

mean_accuracy = np.mean(accuracies)
print(f"BMFK rl only Accuracy: {mean_accuracy:.4f}")

BMFK rl only Accuracy: 0.7577
