In [None]:
import os
import zipfile
import tempfile
import numpy as np
import pandas as pd
import warnings

from tqdm import tqdm
from itertools import combinations
from joblib import Parallel, delayed

from scipy.io import loadmat
from scipy.stats import pearsonr, spearmanr, kendalltau
from scipy.optimize import curve_fit, OptimizeWarning

from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

UID_FEATURES_ZIP  = r'UID_features_Gabor.zip'
SAUD_FEATURES_ZIP = r'SAUD_features_Gabor.zip'

UID_MOS_FILE  = r'mos_UID.xlsx'
SAUD_MOS_FILE = r'SAUD_MOS.xlsx'

NUM_FEATURES = 12
NUM_ITER = 500

TRAIN_RATIO_INTRA = 0.7

OUTPUT_EXCEL = 'Gabor_66Combo_SVR_Ranked_Optimized.xlsx'

ORIENTATIONS = [0, 30, 60, 90, 120, 150]

def logistic_5p(x, b1, b2, b3, b4, b5):
    logistic_part = 0.5 - 1 / (1 + np.exp(np.clip(b2 * (x - b3), -100, 100)))
    return b1 * logistic_part + b4 * x + b5

def compute_metrics(y_true, y_pred):
    # Fast check for constant prediction to avoid errors
    if np.std(y_pred) < 1e-6:
        return (
            pearsonr(y_true, y_pred)[0],
            spearmanr(y_true, y_pred)[0],
            kendalltau(y_true, y_pred)[0],
            np.sqrt(mean_squared_error(y_true, y_pred))
        )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", OptimizeWarning)
        try:
            p0 = [np.ptp(y_true), 1, np.mean(y_pred), 0, np.mean(y_true)]
            popt, _ = curve_fit(logistic_5p, y_pred, y_true, p0=p0, maxfev=1000) # Reduced maxfev slightly for speed
            y_pred = logistic_5p(y_pred, *popt)
        except:
            pass

    return (
        pearsonr(y_true, y_pred)[0],
        spearmanr(y_true, y_pred)[0],
        kendalltau(y_true, y_pred)[0],
        np.sqrt(mean_squared_error(y_true, y_pred))
    )

def load_features_from_zip(zip_path, mos_file):
    mos_df = pd.read_excel(mos_file) if mos_file.endswith(('xls','xlsx')) else pd.read_csv(mos_file)
    cols = {c.lower(): c for c in mos_df.columns}

    name_col = cols.get('name', cols.get('image_name'))
    mos_col = cols.get('mos')

    def clean_name(x):
        return os.path.splitext(os.path.basename(str(x).strip()))[0]

    mos_df['key'] = mos_df[name_col].apply(clean_name)
    mos_dict = dict(zip(mos_df['key'], mos_df[mos_col]))

    X, y = [], []

    with tempfile.TemporaryDirectory() as tmpdir:
        with zipfile.ZipFile(zip_path, 'r') as z:
            z.extractall(tmpdir)

        for root, _, files in os.walk(tmpdir):
            for f in files:
                if f.endswith('.mat'):
                    key = os.path.splitext(f)[0]
                    if key in mos_dict:
                        FG = loadmat(os.path.join(root, f))['FGabor'].reshape(-1)
                        if FG.size == NUM_FEATURES:
                            X.append(FG)
                            y.append(mos_dict[key])

    return np.array(X), np.array(y)

def process_combination(combo_key, X_subset, y_data, train_ratio, n_iter):
    """
    Runs the SVR iterations for a single feature combination.
    This function is executed in parallel workers.
    """
    plcc_l, srcc_l, krcc_l, rmse_l = [], [], [], []

    svr_params = {'kernel': 'rbf', 'C': 100, 'gamma': 'scale', 'epsilon': 0.01}

    for i in range(n_iter):
        X_tr, X_te, y_tr, y_te = train_test_split(
            X_subset, y_data, train_size=train_ratio, random_state=i
        )

        model = SVR(**svr_params)
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_te)

        plcc, srcc, krcc, rmse = compute_metrics(y_te, y_pred)

        plcc_l.append(plcc)
        srcc_l.append(srcc)
        krcc_l.append(krcc)
        rmse_l.append(rmse)

    f1, f2 = map(lambda x: int(x[1:]), combo_key.split('_'))

    return {
        'Combination': combo_key,
        'F1_Scale': 1 if f1 <= 6 else 2,
        'F1_Orientation': ORIENTATIONS[(f1-1) % 6],
        'F2_Scale': 1 if f2 <= 6 else 2,
        'F2_Orientation': ORIENTATIONS[(f2-1) % 6],
        'PLCC': np.mean(plcc_l),
        'SRCC': np.mean(srcc_l),
        'KRCC': np.mean(krcc_l),
        'RMSE': np.mean(rmse_l)
    }

def evaluate_combinations_optimized(X_data, y_data, train_ratio, task_name):
    combos = {
        f'f{i+1}_f{j+1}': X_data[:, [i, j]]
        for i, j in combinations(range(NUM_FEATURES), 2)
    }

    print(f"Starting parallel evaluation for: {task_name} ({len(combos)} combinations)...")

    results = Parallel(n_jobs=-1)(
        delayed(process_combination)(
            combo_key,
            X_subset,
            y_data,
            train_ratio,
            NUM_ITER
        )
        for combo_key, X_subset in tqdm(combos.items(), desc=task_name)
    )

    df = pd.DataFrame(results)
    return df.sort_values(by='PLCC', ascending=False)

if __name__ == "__main__":
    print("Loading datasets...")
    X_uid, y_uid = load_features_from_zip(UID_FEATURES_ZIP, UID_MOS_FILE)
    X_saud, y_saud = load_features_from_zip(SAUD_FEATURES_ZIP, SAUD_MOS_FILE)

    print("\nRunning evaluations (Optimized)...")

    # 1. UID -> UID
    df_uid_uid = evaluate_combinations_optimized(
        X_uid, y_uid,
        TRAIN_RATIO_INTRA, "UID (Intra)"
    )

    # 2. SAUD -> SAUD
    df_saud_saud = evaluate_combinations_optimized(
        X_saud, y_saud,
        TRAIN_RATIO_INTRA, "SAUD (Intra)"
    )

In [None]:
with pd.ExcelWriter('Gabor_66Combo_Recovered.xlsx') as writer:
    df_uid_uid.to_excel(writer, sheet_name='UID_UID', index=False)
    df_saud_saud.to_excel(writer, sheet_name='SAUD_SAUD', index=False)

print(" Data recovered, saved to Gabor_66Combo_Recovered.xlsx")