In [None]:
import numpy as np
from sklearn.datasets import make_multilabel_classification
from sklearn.metrics import classification_report, coverage_error, label_ranking_average_precision_score, hamming_loss
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import ClassifierChain
from sklearn.calibration import CalibratedClassifierCV
from scipy.io import loadmat, savemat
import os
import time

def main_knn(scenario_name, snr_value, top_k=44):
    # Load data
    train_X = loadmat(f'/path/{scenario_name}-{snr_value}-train/data.mat')['datat']
    train_Y = loadmat(f'/path/{scenario_name}-{snr_value}-train-labels/labels.mat')['Labels']
    valid_X = loadmat(f'/path/{scenario_name}-{snr_value}-valid/data.mat')['datat']
    valid_Y = loadmat(f'/path/{scenario_name}-{snr_value}-valid-labels/labels.mat')['Labels']
    test_X = loadmat(f'/path/{scenario_name}-{snr_value}-test/data.mat')['datat']
    test_Y = loadmat(f'/path/{scenario_name}-{snr_value}-test-labels/labels.mat')['Labels']
    
    # Reshape data
    train_X = np.transpose(train_X, (3, 0, 1, 2)).reshape(6000, -1)
    valid_X = np.transpose(valid_X, (3, 0, 1, 2)).reshape(2000, -1)
    test_X = np.transpose(test_X, (3, 0, 1, 2)).reshape(2000, -1)

    # Concatenate train and validation data
    train_X = np.concatenate((train_X, valid_X))
    train_Y = np.concatenate((train_Y, valid_Y))

    # Standardize the data
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    test_X = scaler.transform(test_X)

    # Reduce dimensionality with PCA
    pca = PCA(n_components=10)
    train_X = pca.fit_transform(train_X)
    test_X = pca.transform(test_X)

    # Perform grid search to find the best k for KNeighborsClassifier
    
    param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}
    knn = KNeighborsClassifier()
    grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=3, scoring='accuracy')
    grid_search.fit(train_X, train_Y)

    print(f"Best parameters found: {grid_search.best_params_}")
    best_knn = grid_search.best_estimator_

    # Initialize CalibratedClassifierCV with 'sigmoid' calibration using the best KNeighborsClassifier
    calibrated_knn = CalibratedClassifierCV(best_knn, method='sigmoid')

    # Initialize ClassifierChain with CalibratedClassifierCV
    chain_classifier_knn = ClassifierChain(calibrated_knn, order='random', random_state=42)

    # Fit the data to the ClassifierChain with the calibrated KNeighborsClassifier
    chain_classifier_knn.fit(train_X, train_Y)

    start_inference_time = time.time()

    # Predict probabilities for the test set using the chain classifier
    y_pred_prob_knn = chain_classifier_knn.predict_proba(test_X)
    sorted_indices_knn = np.argsort(-y_pred_prob_knn, axis=1)

    # Initialize array to store top k labels for each sample
    y_pred_chain_knn = np.zeros_like(test_Y)
    
    # Select top k labels for each sample
    for i in range(len(test_Y)):
        top_indices = sorted_indices_knn[i, :top_k]
        y_pred_chain_knn[i, top_indices] = 1

    end_inference_time = time.time()
    inference_time = end_inference_time - start_inference_time
    # Print the inference time
    print(f"Inference Time: {inference_time} seconds", flush=True)

    # Compute classification metrics
    report_knn = classification_report(test_Y, y_pred_chain_knn)
    coverage_err_knn = coverage_error(test_Y, y_pred_prob_knn)
    avg_precision_knn = label_ranking_average_precision_score(test_Y, y_pred_prob_knn)
    hamming_knn = hamming_loss(test_Y, y_pred_chain_knn)

    print("Classification Report (KNeighborsClassifier and CalibratedClassifierCV):\n", report_knn)
    print(f"Coverage Error (KNeighborsClassifier and CalibratedClassifierCV): {coverage_err_knn:.4f}")
    print(f"Label Ranking Average Precision Score (KNeighborsClassifier and CalibratedClassifierCV): {avg_precision_knn:.4f}")
    print(f"Hamming Loss (KNeighborsClassifier and CalibratedClassifierCV): {hamming_knn:.4f}")

    # Save predictions
    directory_knn = "/your-path/"
    os.makedirs(directory_knn, exist_ok=True)
    filename_knn = f"{scenario_name}_{snr_value}_KNN_Chain_Calibrated.mat"
    filepath_knn = os.path.join(directory_knn, filename_knn)

    try:
        savemat(filepath_knn, {"y_pred_chain_knn": y_pred_chain_knn})
    except Exception as e:
        print("Error saving file (KNeighborsClassifier and CalibratedClassifierCV):", e)

# Example usage:
if __name__ == "__main__":
    scenario_name = "44"
    snr_value = 'snr10'
    main_knn(scenario_name, snr_value)