In [2]:
from ultralytics import YOLO
import ultralytics
import os
import numpy as np
from pathlib import Path
#set visible cuda

os.environ['CUDA_VISIBLE_DEVICES'] = '0'


# Load a pretrained YOLOv8n model
model = YOLO('/Data4/student_zhihan_data/source_code/yolo/ultralytics/runs/detect/GC10-DET_brightness_0 detect by yolov8n with dropout(p=0.1)/weights/best.pt')

# Define path to directory containing images and videos for inference
# source = '/Data4/student_zhihan_data/data/GC10-DET/test/images'

# # Run inference on the source
# results = model([os.path.join(source, i) for i in os.listdir(source)])# generator of Results objects

In [3]:
from ultralytics.utils.metrics import compute_ap
from ultralytics.engine.validator import BaseValidator
from ultralytics.utils.metrics import box_iou, Metric, DetMetrics
import numpy as np
import torch
import pandas as pd

def smooth(y, f=0.05):
    """Box filter of fraction f."""
    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
    p = np.ones(nf // 2)  # ones padding
    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
    return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed

def ap_per_class(
    tp, conf, pred_cls, target_cls, plot=False, on_plot=None, save_dir=Path(), names=(), eps=1e-16, prefix=""
):
    """
    Computes the average precision per class for object detection evaluation.

    Args:
        tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False).
        conf (np.ndarray): Array of confidence scores of the detections.
        pred_cls (np.ndarray): Array of predicted classes of the detections.
        target_cls (np.ndarray): Array of true classes of the detections.
        plot (bool, optional): Whether to plot PR curves or not. Defaults to False.
        on_plot (func, optional): A callback to pass plots path and data when they are rendered. Defaults to None.
        save_dir (Path, optional): Directory to save the PR curves. Defaults to an empty path.
        names (tuple, optional): Tuple of class names to plot PR curves. Defaults to an empty tuple.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-16.
        prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.

    Returns:
        (tuple): A tuple of six arrays and one array of unique classes, where:
            tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
            fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
            p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
            r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
            f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
            ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
            unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
            p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
            r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
            f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
            x (np.ndarray): X-axis values for the curves. Shape: (1000,).
            prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes, nt = np.unique(target_cls, return_counts=True)
    nc = unique_classes.shape[0]  # number of classes, number of detections

    # Create Precision-Recall curve and compute AP for each class
    x, prec_values = np.linspace(0, 1, 1000), []

    # Average precision, precision and recall curves
    ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        i = pred_cls == c
        n_l = nt[ci]  # number of labels
        n_p = i.sum()  # number of predictions
        if n_p == 0 or n_l == 0:
            continue

        # Accumulate FPs and TPs
        fpc = (1 - tp[i]).cumsum(0)
        tpc = tp[i].cumsum(0)

        # Recall
        recall = tpc / (n_l + eps)  # recall curve
        r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

        # Precision
        precision = tpc / (tpc + fpc)  # precision curve
        p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1)  # p at pr_score

        # AP from recall-precision curve
        for j in range(tp.shape[1]):
            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
            if plot and j == 0:
                prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5

    prec_values = np.array(prec_values)  # (nc, 1000)

    # Compute F1 (harmonic mean of precision and recall)
    f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
    f2_curve = (5 * p_curve * r_curve) / (4 * p_curve + r_curve + eps)
    # names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
    # names = dict(enumerate(names))  # to dict
    # if plot:
    #     plot_pr_curve(x, prec_values, ap, save_dir / f"{prefix}PR_curve.png", names, on_plot=on_plot)
    #     plot_mc_curve(x, f1_curve, save_dir / f"{prefix}F1_curve.png", names, ylabel="F1", on_plot=on_plot)
    #     plot_mc_curve(x, p_curve, save_dir / f"{prefix}P_curve.png", names, ylabel="Precision", on_plot=on_plot)
    #     plot_mc_curve(x, r_curve, save_dir / f"{prefix}R_curve.png", names, ylabel="Recall", on_plot=on_plot)

    i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
    p, r, f1, f2 = p_curve[:, i], r_curve[:, i], f1_curve[:, i], f2_curve[:, i]  # max-F1 precision, recall, F1 values
    tp = (r * nt).round()  # true positives
    fp = (tp / (p + eps) - tp).round()  # false positives
    
    ap_50 = ap[:, 0].mean()
    ap_50_95 = ap.mean()
    
    return tp, fp, p, r, f1, f2, ap_50, ap_50_95, unique_classes.astype(int), p_curve, r_curve, f1_curve, f2_curve, x, prec_values


In [None]:
def compute_f2(source, model):
    images = os.listdir(source)
    results = []
    size = len(images) // 10
    for i in range(11):
        images_ = images[i* size: (i+1) * size] if i < 10 else images[i * size:]
        results.extend(model([os.path.join(source, i) for i in images_]))

    validator = BaseValidator()
    validator.iouv = torch.arange(0.5, 1, 0.05)
    
    df = pd.DataFrame(columns=['img_name', 'p', 'r', 'f1', 'f2', 'ap_50', 'ap_50_95', 'conf', 'pred_cls', 'target_cls'])

    for result in results:
        metric = DetMetrics()
        label_path = result.path.replace('images', 'labels')[:-4] + '.txt'
        label = torch.from_numpy(np.loadtxt(label_path))
        # turn label into (x1, y1, x2, y2) format
        
        if len(label) == 0:
            continue
        
        if label.dim() > 1:
            x1 = label[:, 1] - label[:, 3] / 2
            y1 = label[:, 2] - label[:, 4] / 2
            x2 = label[:, 1] + label[:, 3] / 2
            y2 = label[:, 2] + label[:, 4] / 2
            label = torch.stack((label[:, 0], x1, y1, x2, y2), 1)
            iou = box_iou(label[:, 1:].to("cuda:0"), result.boxes.xyxyn.to("cuda:0"))
            tp = validator.match_predictions(result.boxes.data[:,-1], label[:, 0].to("cuda:0"), iou)
            
            # update metric
            tp = tp.detach().cpu().numpy()
            conf = result.boxes.conf.detach().cpu().numpy()
            pre_cls = result.boxes.cls.detach().cpu().numpy()
            target_cls = label[:, 0].detach().cpu().numpy()
        
        elif label.dim() == 1:
            x1 = label[1] - label[3] / 2
            y1 = label[2] - label[4] / 2
            x2 = label[1] + label[3] / 2
            y2 = label[2] + label[4] / 2
            label = torch.tensor([label[0], x1, y1, x2, y2])
            iou = box_iou(label[1:].to("cuda:0").reshape(1, -1), result.boxes.xyxyn.to("cuda:0"))
            tp = validator.match_predictions(result.boxes.data[:,-1], label[0].to("cuda:0").unsqueeze(0), iou)
        
            # update metric
            tp = tp.detach().cpu().numpy()
            conf = result.boxes.conf.detach().cpu().numpy()
            pre_cls = result.boxes.cls.detach().cpu().numpy()
            target_cls = label[0].unsqueeze(0).detach().cpu().numpy()
            
        _, _, p, r, f1, f2, ap_50, ap_50_95, unique_classes, p_curve, r_curve, f1_curve, f2_curve, x, prec_values = ap_per_class(tp, conf, pre_cls, target_cls)
        print(f2.mean(), unique_classes)
        
        # update df
        df.loc[len(df)] = [result.path, p, r, f1, f2, ap_50, ap_50_95, conf, pre_cls, target_cls]

    df.to_csv(f'/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/F2_Record/{source.split("/")[-3]}_train.csv', index=False, header=True)
    return df

for i in os.listdir('/Data4/student_zhihan_data/data'):
    # if i[-3:] != 'csv' and i != 'NEU-DET' and i != 'data.zip' and 'Gaussian' in i:
    if i == 'GC10-DET':
        source = os.path.join('/Data4/student_zhihan_data/data', i, 'train/images')
        df = compute_f2(source, model)

In [7]:
# input: uncertainty * 3, proposed * score, NIQE, BRISQUE
# output: quality score 

F2_dir = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/F2_Record'
Proposed_dir = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Proposed_Score_Record'
Uncertainty_dir = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Uncertainty_Record'

# concat all csv
dfs = []
for idx, dir in enumerate([F2_dir, Proposed_dir, Uncertainty_dir]):
    files = os.listdir(dir)
    for file in files:
        if 'train' in file:
            files.remove(file)

    if idx == 0:
        # df.to_csv(f'{dir}_new.csv', index=False, header=True)
        df = None
        for file in files:
            if file[-7:] == 'new.csv':
                if df is None:
                    df = pd.read_csv(os.path.join(dir, file))
                else:
                    df = pd.concat([df, pd.read_csv(os.path.join(dir, file))], ignore_index=True)
        df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/F2_Record_new.csv')
    elif idx == 1:
        df = None
        # df = pd.DataFrame(columns=['dataset','img_name','p', 'r', 'f1', 'f2', 'ap_50', 'ap_50_95', 'conf', 'pred_cls', 'target_cls'])
        for file in files:
            if file[-3:] == 'csv':
                if df is None:
                    df = pd.read_csv(os.path.join(dir, file))
                else:
                    df = pd.concat([df, pd.read_csv(os.path.join(dir, file))], ignore_index=True)
        # df['img_name'] = df['dataset'] + '/images/' + df['img_name']
        # df = df.drop(columns=['dataset'])
        # df.to_csv(f'{dir}.csv', index=False, header=True)
        df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Proposed_Record_new.csv')
    else:
        df = None
        for file in files:
            if file[-3:] == 'csv':
                if df is None:
                    df = pd.read_csv(os.path.join(dir, file))
                else:
                    df = pd.concat([df, pd.read_csv(os.path.join(dir, file))], ignore_index=True)
        df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Uncertainty_Record_new.csv')
        # df['img_name'] = df['dataset'] + '/test/images/' + df['img_name']
        # df = df.drop(columns=['dataset'])
        # df.to_csv(f'{dir}.csv', index=False, header=True)


In [23]:
# #combine
# df = pd.read_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Uncertainty_Record.csv')
# df['img_name'] = df['dataset'] + '/test/images/' + df['img_name']
# df = df.drop(columns=['dataset'])
# df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Uncertainty_Record.csv', index=False, header=True)

# df = pd.read_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Proposed_Record.csv')
# df['img_name'] = df['dataset'] + '/images/' + df['img_name']
# df = df.drop(columns=['dataset'])
# df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Proposed_Record.csv', index=False, header=True)

In [19]:
# files = os.listdir(Uncertainty_dir)
# df = pd.DataFrame(columns=['dataset','img_name','objectness_uncertainty','objectness_entropy','weighted_variance_sum','weighted_entropy'])
# for file in files:
#     if file[-3:] == 'csv':
#         tmp = pd.read_csv(os.path.join(Uncertainty_dir, file))
#         #add header to tmp
#         tmp.columns = ['dataset','img_name','objectness_uncertainty','objectness_entropy','weighted_variance_sum','weighted_entropy']
#         df = pd.concat([df, tmp], ignore_index=True)

# df.to_csv(f'Uncertainty_Record.csv', index=False, header=True)

  df = pd.concat([df, tmp], ignore_index=True)


In [18]:
# files = os.listdir(Proposed_dir)
# df = pd.DataFrame(columns=['dataset', 'img_name', 'visibility', 'exposure'])
# for file in files:
#     if file[-3:] == 'csv':
#         tmp = pd.read_csv(os.path.join(Proposed_dir, file))
#         #add header to tmp
#         tmp.columns = ['dataset', 'img_name', 'visibility', 'exposure']
#         df = pd.concat([df, tmp], ignore_index=True)

# df.to_csv(f'Proposed_Record.csv', index=False, header=True)

  df = pd.concat([df, tmp], ignore_index=True)


In [97]:
# dfs = []
# img_names =[]
# for idx, dir in enumerate(['/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/F2_Record_new.csv',
#                           '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Proposed_Record_new.csv',
#                            '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Uncertainty_Record_new.csv']):
#     df = pd.read_csv(dir)
#     df.drop(columns=['Unnamed: 0'], inplace=True)
#     #cal the number of row with 'Gaussian' in img_name
#     print(f'{idx}: {len(df[df["img_name"].str.contains("GaussianBlur_13")])}')
#     img_names.append(df[df["img_name"].str.contains("GaussianBlur_13")]['img_name'])
#     dfs.append(df)
 
# merged_df = pd.merge(dfs[0], dfs[1], on=['img_name'])
# merged_df = pd.merge(merged_df, dfs[2], on=['img_name'])
# merged_df = pd.read_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Merged_final.csv')
merged_df = pd.read_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/train_score/Merged_Training.csv')
#set new column distortion equal to the first split of img_name column 
merged_df['distortion'] = merged_df['img_name'].apply(lambda x: x.split('/')[4])
# merged_df.drop(columns=['Unnamed: 0'], inplace=True)
unique_distortion = merged_df['distortion'].unique()
unique_distortion.sort()
for idx, distortion in enumerate(unique_distortion):
    print(f'{idx}: {distortion}')
    merged_df.loc[merged_df['distortion'] == distortion, 'label'] = idx
#set dtype of distortion column
# merged_df['distortion'] = (merged_df['distortion']).astype(int)

#get intersection of each list in img_names
# intersection = set.intersection(set(img_names[0]), set(img_names[1]), set(img_names[2]))
# print(f'intersection: {len(intersection)}')
merged_df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/train_score/Merged_Training.csv', index=False, header=True)

0: GC10-DET


In [98]:
merged_df['distortion'].value_counts()

distortion
GC10-DET    1593
Name: count, dtype: int64

In [99]:
merged_df.describe()

Unnamed: 0,visibility,exposure,objectness_uncertainty,weighted_variance_sum,weighted_entropy,f1,ap_50,ap_50_95,label
count,1593.0,1593.0,1593.0,1593.0,1593.0,0.0,1593.0,1593.0,1593.0
mean,0.237052,0.026693,0.213562,0.211834,1.875176,,0.627652,0.329721,0.0
std,0.165587,0.033949,0.404446,0.405391,0.611218,,0.448788,0.284848,0.0
min,0.000231,2.7e-05,0.0,0.0,0.22802,,0.0,0.0,0.0
25%,0.103034,0.003213,0.002766,0.000255,1.141025,,0.0,0.0,0.0
50%,0.206997,0.011682,0.006289,0.00114,2.280131,,0.995,0.34825,0.0
75%,0.345798,0.037336,0.015718,0.016527,2.285895,,0.995,0.58475,0.0
max,0.821447,0.197278,1.0,1.0,2.302585,,0.995,0.995,0.0


In [95]:
# merged_df['weighted_entropy'] = merged_df['weighted_variance_sum']
# merged_df['weighted_variance_sum'] = merged_df['objectness_entropy']
# merged_df.drop(columns=['objectness_entropy'], inplace=True)
merged_df.describe()

Unnamed: 0,visibility,exposure,objectness_uncertainty,weighted_variance_sum,weighted_entropy,ap_50,ap_50_95,label
count,11325.0,11450.0,11221.0,11221.0,11221.0,11326.0,11326.0,11450.0
mean,0.2651742,0.158247,0.418748,0.418508,2.034734,0.389393,0.195023,24.5
std,0.2006516,0.248522,0.489448,0.489724,0.516566,0.460851,0.264879,14.4315
min,3.22e-07,0.0,0.0,0.0,0.325677,0.0,0.0,0.0
25%,0.1058247,0.000897,0.005022,0.000811,2.271354,0.0,0.0,12.0
50%,0.2264281,0.02907,0.012798,0.013464,2.284308,0.0,0.0,24.5
75%,0.3793976,0.203864,1.0,1.0,2.302585,0.995,0.398,37.0
max,0.9889542,0.99999,1.0,1.0,2.302585,0.995,0.995,49.0


In [27]:
merged_df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/merged_new.csv', index=False, header=True)

In [7]:
from ultralytics.engine.validator import BaseValidator
from ultralytics.utils.metrics import box_iou, Metric, DetMetrics
import numpy as np
import torch
import pandas as pd

# load txt label


validator = BaseValidator()
validator.iouv = torch.arange(0.5, 1, 0.05)

for result in results:
    metric = DetMetrics()
    label_path = result.path.replace('images', 'labels')[:-4] + '.txt'
    label = torch.from_numpy(np.loadtxt(label_path))
    # turn label into (x1, y1, x2, y2) format
    
    if label.dim() > 1:
        x1 = label[:, 1] - label[:, 3] / 2
        y1 = label[:, 2] - label[:, 4] / 2
        x2 = label[:, 1] + label[:, 3] / 2
        y2 = label[:, 2] + label[:, 4] / 2
        label = torch.stack((label[:, 0], x1, y1, x2, y2), 1)
        iou = box_iou(label[:, 1:].to("cuda:0"), result.boxes.xyxyn.to("cuda:0"))
        tp = validator.match_predictions(result.boxes.data[:,-1], label[:, 0].to("cuda:0"), iou)
        
        # update metric
        tp = tp.detach().cpu().numpy()
        conf = result.boxes.conf.detach().cpu().numpy()
        pre_cls = result.boxes.cls.detach().cpu().numpy()
        target_cls = label[:, 0].detach().cpu().numpy()
    
    else:
        x1 = label[1] - label[3] / 2
        y1 = label[2] - label[4] / 2
        x2 = label[1] + label[3] / 2
        y2 = label[2] + label[4] / 2
        label = torch.tensor([label[0], x1, y1, x2, y2])
        iou = box_iou(label[1:].to("cuda:0").reshape(1, -1), result.boxes.xyxyn.to("cuda:0"))
        tp = validator.match_predictions(result.boxes.data[:,-1], label[0].to("cuda:0").unsqueeze(0), iou)
    
        # update metric
        tp = tp.detach().cpu().numpy()
        conf = result.boxes.conf.detach().cpu().numpy()
        pre_cls = result.boxes.cls.detach().cpu().numpy()
        target_cls = label[0].unsqueeze(0).detach().cpu().numpy()
    
    _, _, p, r, f1, f2, ap50, ap_50_95, unique_classes, p_curve, r_curve, f1_curve, f2_curve, x, prec_values = ap_per_class(tp, conf, pre_cls, target_cls)
    print(f2.mean(), unique_classes)
      

1.0 [1 9]
1.0 [8]
0.0 [2]
1.0 [8]
0.8125 [1 9]
1.0 [6]
0.0 [0]
0.0 [8]
1.0 [7]
0.0 [7]
1.0 [1 9]
0.8333333333333334 [1]
0.0 [2]
0.0 [6]
0.0 [6]
0.0 [5]
0.8861831775579442 [3]
0.0 [8]
0.5 [3]
0.9894106530883233 [1 9]
1.0 [1 9]
0.0 [3]
1.0 [4]
0.0 [9]
0.25 [1 4 8 9]
1.0 [6]
0.955061678002223 [4 9]
1.0 [8]
1.0 [8]
0.0 [2]
1.0 [6 8]
1.0 [8]
0.0 [6]
1.0 [4]
0.9698366958523534 [9]
0.8333333333333334 [9]
1.0 [8]
0.9166666666666667 [4 9]
1.0 [7]
1.0 [6]
0.9784231710625808 [6]
0.0 [6]
1.0 [4]
0.0 [6]
0.0 [6]
0.0 [2]
1.0 [4 9]
0.969202609172539 [4 9]
1.0 [8]
0.0 [2]
0.6682954202760385 [3]
0.0 [6]
0.9784407998647183 [7]
0.8620689655172415 [3]
1.0 [1 9]
1.0 [4 9]
0.0 [2]
1.0 [7]
0.0 [0]
0.5 [1 9]
0.9166666666666667 [6 8]
1.0 [8]
1.0 [6]
0.0 [6]
0.0 [7]
0.9704364934628894 [8]
0.0 [2]
0.0 [5]
0.5 [3]
0.0 [6]
0.0 [2 6]
0.0 [2]
0.0 [6]
1.0 [1]
1.0 [3]
1.0 [6]
0.0 [6 8]
0.0 [6]
0.0 [5]
0.0 [7]
0.8282790301542053 [3]
1.0 [9]
0.0 [2]
0.9166666666666667 [1 9]
1.0 [8]
1.0 [1 9]
1.0 [6]
0.0 [3]
0.0 [2 8]
1.

In [8]:
# Process results generator
for result in results:
    boxes = result.boxes  # Boxes object for bbox outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs


In [214]:
import numpy as np
import ast
import pandas as pd

def safe_convert_to_list(s):
    try:
        # Attempt to directly evaluate the string
        return ast.literal_eval(s)
    except SyntaxError:
        # If direct evaluation fails, attempt to manually parse the string
        cleaned_str = s.strip('[]')
        if cleaned_str:  # Check if the string is not empty
            numbers = [float(num) for num in cleaned_str.split() if num not in ['[', ']']]
            return numbers
        else:
            return []

# Load the dataset
test_path = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/Merged_final.csv'
train_path = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/train_score/Merged_Training.csv'
for data_path in [test_path, train_path, ]:
    data = pd.read_csv(data_path)

    # Display the first few rows of the dataframe to understand its structure
    data.head()


    # Apply the conversion function to the 'f2' column and then compute the mean F2 score
    data['f2'] = data['f2'].fillna('[]')
    data['f2'] = data['f2'].apply(safe_convert_to_list)
    data['mean_f2'] = data['f2'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)


    # data = data[data['img_name'].str.contains('/GC10-DET/')]

    # Prepare the dataset for modeling
    # features = ['visibility', 'exposure', 'objectness_uncertainty', 'weighted_variance_sum', 'weighted_entropy', 'label']
    # features = ['visibility', 'exposure', 'label']
    features = ['objectness_uncertainty', 'weighted_variance_sum', 'weighted_entropy', 'label']

    # features = ['visibility', 'exposure']
    # features = ['objectness_uncertainty', 'weighted_variance_sum', 'weighted_entropy']
    if data_path == train_path:
        X_train = data[features]
        y_train = data['mean_f2']
    else:
        tmp = data
        X_test = data[features]
        y_test = data['mean_f2']
        # fill nan
        # X_test['visibility'] = X_test['visibility'].fillna(0)
        X_test['objectness_uncertainty'] = X_test['objectness_uncertainty'].fillna(1)
        X_test['weighted_variance_sum'] = X_test['weighted_variance_sum'].fillna(1)
        X_test['weighted_entropy'] = X_test['weighted_entropy'].fillna(np.log(10))
        
    # y = data['ap_50_95']
    # y = data['ap_50']

# standard scale X
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# scaler = StandardScaler()
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)

# Display the first few rows of features and target to verify

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['objectness_uncertainty'] = X_test['objectness_uncertainty'].fillna(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['weighted_variance_sum'] = X_test['weighted_variance_sum'].fillna(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['weighted_entropy'] = X_test['weighted_entro

In [215]:
X_train.isna().any()
X_test.isna().any()
# y_test.isna().any()
X_test.describe()

Unnamed: 0,objectness_uncertainty,weighted_variance_sum,weighted_entropy,label
count,11450.0,11450.0,11450.0,11450.0
mean,0.430373,0.430138,2.040091,24.5
std,0.491315,0.49159,0.512747,14.4315
min,0.0,0.0,0.325677,0.0
25%,0.005143,0.000848,2.272682,12.0
50%,0.013622,0.01531,2.284593,24.5
75%,1.0,1.0,2.302585,37.0
max,1.0,1.0,2.302585,49.0


In [216]:
X_train.describe()

Unnamed: 0,objectness_uncertainty,weighted_variance_sum,weighted_entropy,label
count,1593.0,1593.0,1593.0,1593.0
mean,0.213562,0.211834,1.875176,0.0
std,0.404446,0.405391,0.611218,0.0
min,0.0,0.0,0.22802,0.0
25%,0.002766,0.000255,1.141025,0.0
50%,0.006289,0.00114,2.280131,0.0
75%,0.015718,0.016527,2.285895,0.0
max,1.0,1.0,2.302585,0.0


In [217]:
from scipy import stats
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

SRCC = []
PLCC = []
MSE = []
R2 = []

# kf = KFold(n_splits=10, shuffle=True, random_state=42)

# for train_index, test_index in kf.split(data.index):
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]
    
#     #add noise to X_train
#     X_train = X_train + np.random.normal(0, 0.01, X_train.shape)

# Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# X_train = X_train[:-1] + np.random.normal(0, 0.0001, X_train[:-1].shape)
# scaler = MinMaxScaler()
# X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
# X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
    "MLP Regressor": MLPRegressor(random_state=42, max_iter=1000), # Increased max_iter for convergence
    "Random Forest Regressor": RandomForestRegressor(random_state=42),
}

features.remove('label')

# Train and evaluate each model
results = {}
for name, model in models.items():
    # Train
    model.fit(X_train.loc[:,features], y_train)
    # Predict
    y_pred = model.predict(X_test.loc[:,features])
    # Evaluate
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    srcc = stats.spearmanr(y_test, y_pred)[0]
    plcc = stats.pearsonr(y_test, y_pred)[0]
    
    if name == "Random Forest Regressor":                       
        SRCC.append(srcc)
        PLCC.append(plcc)
        MSE.append(mse)
        R2.append(r2)
    
    results[name] = {"MSE": mse, "R2": r2, "SRCC": srcc, "PLCC": plcc}
    
    if name == 'Decision Tree Regressor':
        # save test resulst to csv
        df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        df.to_csv(f'/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/test_result.csv', index=False)
    
    if name == 'Linear Regression':
        print(model.intercept_, model.coef_)
        
    #print_result

results_df = pd.DataFrame(results).T  # Convert results to a DataFrame for better readability
results_df

0.6648476871601723 [      10.58     -11.226    0.039008]


Unnamed: 0,MSE,R2,SRCC,PLCC
Linear Regression,0.142849,0.314198,0.58948,0.603036
Decision Tree Regressor,0.204128,0.020002,0.414991,0.491255
MLP Regressor,0.151094,0.274612,0.568346,0.582809
Random Forest Regressor,0.142922,0.313848,0.611164,0.605066


In [218]:
model.feature_importances_

array([    0.20476,     0.56638,     0.22886])

In [129]:
combine = 1/2 * X_test['visibility'] + 1/2 * (1 - X_test['exposure'])
mse = mean_squared_error(y_test, combine)
r2 = r2_score(y_test, combine)
srcc = stats.spearmanr(y_test, combine)[0]
plcc = stats.pearsonr(y_test, combine)[0]
print(mse, r2, srcc, plcc)

0.24698101096602526 -0.1857307058313724 0.02512071480735943 0.07130610239294317


In [162]:
dirs = X_test['label'].unique()

In [183]:
tmp[tmp['label'] == 0]['distortion'].iloc[0]

'GC10-DET'

In [212]:
X_test['predict'] = y_pred
X_train['predict'] = model.predict(X_train.loc[:,features])
# X_combine = pd.concat([X_train, X_test]) 
# y_combine = pd.concat([y_train, y_test])

results = pd.DataFrame(columns=['dataset', 'predict', 'number'])
# X_test.to_csv(f'/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/random_forest_result.csv', index=False)
for label in dirs:
    #cal average predict group by label
    results.loc[len(results)] = [tmp[tmp['label'] == label]['distortion'].iloc[0], X_test.loc[X_test["label"] == label, "predict"].mean(), (X_test['label'] == label).sum()]
    print(tmp[tmp['label'] == label]['distortion'].iloc[0], (X_test['label'] == label).sum())
# results.to_excel(f'/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/regression_result.xlsx', index=False) 

# Excel文件路径
file_path = '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/regression_result.xlsx'

# 使用ExcelWriter并设置mode为'a'，即追加模式
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
    # 将DataFrame写入新的sheet，命名为'NewSheet'
    results.to_excel(writer, sheet_name='Uncertainty')   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['predict'] = y_pred
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['predict'] = model.predict(X_train.loc[:,features])


GC10-DET_brightness_-15 229
GC10-DET_Transform_Scale_0.0:0.05 229
GC10-DET_Transform_Scale_0.15:0.2 229
GC10-DET_MedianBlur_29 229
GC10-DET_brightness_50 229
GC10-DET_brightness_110 229
GC10-DET_brightness_-150 229
GC10-DET_GaussianBlur_3 229
GC10-DET_brightness_-20 229
GC10-DET_Transform_Scale_0.05:0.1 229
GC10-DET_GaussianBlur_7 229
GC10-DET_BilateralBlur_300 229
GC10-DET_Transform_Scale_0.1:0.15 229
GC10-DET_Sharpen_5 229
GC10-DET_BilateralBlur_60 229
GC10-DET_brightness_70 229
GC10-DET_GaussianBlur_11 229
GC10-DET_GaussianBlur_17 229
GC10-DET_Sharpening_1.5 229
GC10-DET_BilateralBlur_120 229
GC10-DET_Transform_Scale_0.25:0.3 229
GC10-DET_Sharpen_10 229
GC10-DET_GaussianBlur_15 229
GC10-DET_Sharpening_2.0 229
GC10-DET_MedianBlur_43 229
GC10-DET_Sharpening_3 229
GC10-DET_MedianBlur_71 229
GC10-DET 229
GC10-DET_GaussianBlur_9 229
GC10-DET_MedianBlur_57 229
GC10-DET_BilateralBlur_240 229
GC10-DET_brightness_30 229
GC10-DET_MedianBlur_15 229
GC10-DET_GaussianBlur_5 229
GC10-DET_brightne

In [53]:
X_test.describe()

AttributeError: 'numpy.ndarray' object has no attribute 'describe'

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
      <th>SRCC</th>
      <th>PLCC</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.193364</td>
      <td>0.095085</td>
      <td>0.307203</td>
      <td>0.309837</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.271613</td>
      <td>-0.271107</td>
      <td>0.370684</td>
      <td>0.366573</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.164739</td>
      <td>0.229049</td>
      <td>0.490648</td>
      <td>0.482423</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.148216</td>
      <td>0.306374</td>
      <td>0.550785</td>
      <td>0.554713</td>
    </tr>
  </tbody>
</table>
</div>

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
      <th>SRCC</th>
      <th>PLCC</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.065377</td>
      <td>0.131179</td>
      <td>0.348499</td>
      <td>0.364317</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.096486</td>
      <td>-0.282236</td>
      <td>0.385156</td>
      <td>0.375813</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.068393</td>
      <td>0.091110</td>
      <td>0.340770</td>
      <td>0.308201</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.047503</td>
      <td>0.368724</td>
      <td>0.598867</td>
      <td>0.609294</td>
    </tr>
  </tbody>
</table>
</div>

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
      <th>SRCC</th>
      <th>PLCC</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.193365</td>
      <td>0.095085</td>
      <td>0.307206</td>
      <td>0.309837</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.272660</td>
      <td>-0.276006</td>
      <td>0.349884</td>
      <td>0.361031</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.190556</td>
      <td>0.108228</td>
      <td>0.324465</td>
      <td>0.333133</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.147864</td>
      <td>0.308021</td>
      <td>0.550822</td>
      <td>0.556283</td>
    </tr>
  </tbody>
</table>
</div>

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.197254</td>
      <td>0.064327</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.380059</td>
      <td>-0.802814</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.193648</td>
      <td>0.081430</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.213226</td>
      <td>-0.011437</td>
    </tr>
  </tbody>
</table>
</div>

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.199501</td>
      <td>0.053668</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.283149</td>
      <td>-0.343119</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.200023</td>
      <td>0.051190</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.161334</td>
      <td>0.234711</td>
    </tr>
  </tbody>
</table>
</div>

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>MSE</th>
      <th>R2</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Linear Regression</th>
      <td>0.188070</td>
      <td>0.107889</td>
    </tr>
    <tr>
      <th>Decision Tree Regressor</th>
      <td>0.275943</td>
      <td>-0.308935</td>
    </tr>
    <tr>
      <th>MLP Regressor</th>
      <td>0.185389</td>
      <td>0.120607</td>
    </tr>
    <tr>
      <th>Random Forest Regressor</th>
      <td>0.140655</td>
      <td>0.332804</td>
    </tr>
  </tbody>
</table>
</div>

In [35]:
model.feature_importances_

array([    0.16596,     0.16575,     0.17716,     0.26334,     0.22779])

In [113]:
X_test.describe()

Unnamed: 0,visibility,exposure,objectness_uncertainty,weighted_variance_sum,weighted_entropy
count,1217.0,1217.0,1217.0,1217.0,1217.0
mean,0.270493,0.230253,0.006240577,0.149462,0.001661
std,0.20981,0.272485,0.004841622,0.07308,0.004813
min,0.00042,0.0,3.902979e-08,0.05516,0.0
25%,0.093177,0.010945,0.002279311,0.097146,3.7e-05
50%,0.230777,0.109753,0.005438593,0.107024,0.000173
75%,0.402377,0.393562,0.008849673,0.226471,0.000954
max,0.958951,0.999026,0.02272954,0.413786,0.04097


In [15]:
y_train.describe()

count    5651.000000
mean        0.240280
std         0.275259
min         0.000000
25%         0.000000
50%         0.099500
75%         0.466655
max         0.995000
Name: ap_50_95, dtype: float64

In [19]:
y_test.describe()

count    1413.000000
mean        0.471483
std         0.462422
min         0.000000
25%         0.000000
50%         0.500000
75%         1.000000
max         1.000000
Name: mean_f2, dtype: float64

In [7]:
from pyiqa.archs.niqe_arch import *
from pyiqa.utils import load_file_from_url

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
default_model_urls = {
    'url': 'https://github.com/chaofengc/IQA-PyTorch/releases/download/v0.1-weights/niqe_modelparameters.mat',
    'niqe': 'https://github.com/chaofengc/IQA-PyTorch/releases/download/v0.1-weights/niqe_modelparameters.mat',
    'ilniqe': 'https://github.com/chaofengc/IQA-PyTorch/releases/download/v0.1-weights/ILNIQE_templateModel.mat',
    'pretrained': '/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/model.mat'
}

In [9]:
import cv2
path = '/Data4/student_zhihan_data/data/GC10-DET_brightness_110/test/images/img_01_425000300_00630_jpg.rf.12001adc8b86faf88a47b6aa6f321b91.jpg'
img = cv2.imread(path)
img = torch.from_numpy(img)
img = img.permute(2,0,1).unsqueeze(0)
# calculate_niqe(img, color_space='gray', pretrained_model_path=load_file_from_url(default_model_urls['niqe']))
# calculate_ilniqe(img, color_space='gray', pretrained_model_path=default_model_urls['pretrained'])
calculate_niqe(img, color_space='gray', pretrained_model_path='model.mat')

tensor(1259.6615, dtype=torch.float64)

In [85]:
# load matlab model
from scipy.io import loadmat
covar = loadmat('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/covar.mat')
mean = loadmat('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/mean.mat')
model = loadmat(load_file_from_url(default_model_urls['niqe']))
model['mu_prisparam'] = np.array(mean['mean'])
model['cov_prisparam'] = np.array(covar['covariance'])
# save mat model
import scipy.io
scipy.io.savemat('model.mat', model)

In [81]:
model = loadmat('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/model.mat')
# change model key name 'None' to 'templateModel'
model['templateModel'] = model.pop('None')
scipy.io.savemat('model.mat', model)

In [86]:
loadmat('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/model.mat')

{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Wed Feb 14 16:14:01 2024',
 '__version__': '1.0',
 '__globals__': [],
 'mu_prisparam': array([[     2.8918,      1.1237,     0.94071,  -0.0074117,     0.30461,     0.29457,     0.92068,     0.16218,     0.21715,     0.36395,     0.93394,   -0.093931,     0.33724,     0.25074,       0.934,   -0.094519,     0.33749,     0.25046,      2.7161,       1.036,     0.91893,    0.018017,     0.24276,
             0.25803,     0.90293,     0.12705,     0.18806,     0.29925,     0.89492,   -0.053007,     0.26442,     0.21893,     0.89485,   -0.055262,     0.26535,     0.21794]]),
 'cov_prisparam': array([[    0.11128,     0.04245,    0.031441, ...,   -0.003672,    0.022724,    0.018208],
        [    0.04245,    0.022494,    0.013837, ...,  -0.0016413,    0.010588,   0.0086856],
        [   0.031441,    0.013837,   0.0098078, ...,  -0.0011065,   0.0070665,   0.0057277],
        ...,
        [  -0.003672,  -0.0016413,  -0.0011065, ..

In [75]:
loadmat(load_file_from_url(default_model_urls['niqe']))

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Fri Aug 24 17:52:00 2012',
 '__version__': '1.0',
 '__globals__': [],
 'mu_prisparam': array([[     2.6013,      0.9057,     0.81205,    0.090427,     0.13873,     0.20603,     0.81897,    0.062462,     0.15333,     0.19591,     0.82647,   -0.025526,     0.18857,     0.16578,     0.82429,   -0.025361,     0.18724,     0.16505,      2.9695,     0.96123,     0.84935,    0.082383,     0.16132,
             0.22492,     0.85895,    0.055084,     0.17531,     0.21713,     0.87208,   -0.032221,     0.21549,     0.18821,      0.8694,   -0.032326,     0.21474,     0.18678]]),
 'cov_prisparam': array([[    0.45348,    0.096101,    0.082763, ...,  -0.0068539,    0.041395,    0.031916],
        [   0.096101,    0.037112,    0.021553, ...,  -0.0032338,    0.012877,   0.0095948],
        [   0.082763,    0.021553,    0.017707, ...,  -0.0016373,   0.0089932,   0.0069435],
        ...,
        [ -0.0068539,  -0.0032338,  -0.0016373,

In [45]:
# read csv
import pandas as pd
import cv2
df = pd.read_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/merged_new.csv')
# calculate niqe for each row and add to new column
for idx, row in df.iterrows():
    img = cv2.imread(row['img_name'])
    img = torch.from_numpy(img)
    img = img.permute(2,0,1).unsqueeze(0)
    try:
        niqe = calculate_niqe(img, color_space='gray', pretrained_model_path='/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/model.mat')
        df.loc[idx, 'niqe'] = niqe.item()
    except:
        #set NAN
        niqe = np.nan
        df.loc[idx, 'niqe'] = niqe


  return torch.cov(tensor, correction=correction)


In [47]:
df.to_csv('/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/merged_new.csv', index=False, header=True)

In [74]:
data = df.dropna()

# scale niqe to [0,1]
# data['niqe'] = (data['niqe'] - data['niqe'].min()) / (data['niqe'].max() - data['niqe'].min())
data['niqe'] = data['niqe'] / data['niqe'].max()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['niqe'] = data['niqe'] / data['niqe'].max()


In [75]:
import ast
# Apply the conversion function to the 'f2' column and then compute the mean F2 score
def safe_convert_to_list(s):
    try:
        # Attempt to directly evaluate the string
        return ast.literal_eval(s)
    except SyntaxError:
        # If direct evaluation fails, attempt to manually parse the string
        cleaned_str = s.strip('[]')
        if cleaned_str:  # Check if the string is not empty
            numbers = [float(num) for num in cleaned_str.split() if num not in ['[', ']']]
            return numbers
        else:
            return []
        
data['f2'] = data['f2'].apply(safe_convert_to_list)
data['mean_f2'] = data['f2'].apply(lambda x: np.mean(x) if len(x) > 0 else np.nan)

# Prepare the dataset for modeling
features = ['visibility', 'exposure', 'objectness_uncertainty', 'weighted_variance_sum', 'weighted_entropy', 'niqe']
# features = ['visibility', 'exposure']
# features = ['objectness_uncertainty', 'weighted_variance_sum', 'weighted_entropy']
X = data[features]
y = data['mean_f2']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['f2'] = data['f2'].apply(safe_convert_to_list)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['mean_f2'] = data['f2'].apply(lambda x: np.mean(x) if len(x) > 0 else np.nan)


In [76]:
X.dropna().describe()

Unnamed: 0,visibility,exposure,objectness_uncertainty,weighted_variance_sum,weighted_entropy,niqe
count,6083.0,6083.0,6083.0,6083.0,6083.0,6083.0
mean,0.2777211,0.231871,0.006378834,0.145793,0.001423,0.001273
std,0.2134308,0.269062,0.004907808,0.072428,0.004295,0.024679
min,3.222425e-07,0.0,3.902979e-08,0.050507,0.0,0.0
25%,0.1020507,0.014917,0.002376295,0.09663,3.7e-05,3.3e-05
50%,0.2346791,0.117202,0.00548942,0.105499,0.000158,6.8e-05
75%,0.4125931,0.386221,0.009104459,0.209953,0.000815,0.000137
max,0.9695124,0.999207,0.02989884,0.479202,0.056565,1.0


In [80]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.svm import SVR

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
    "MLP Regressor": MLPRegressor(random_state=42, max_iter=1000), # Increased max_iter for convergence
    "Random Forest Regressor": RandomForestRegressor(random_state=42),
    # SVM
    "SVM Regressor": SVR(kernel='linear', C=1.0, epsilon=0.1),
}

# Train and evaluate each model
results = {}
for name, model in models.items():
    # Train
    model.fit(X_train, y_train)
    # Predict
    y_pred = model.predict(X_test)
    # Evaluate
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {"MSE": mse, "R2": r2}
    
    # if name == 'Decision Tree Regressor':
    #     # save test resulst to csv
    #     temp = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    #     .to_csv(f'/Data4/student_zhihan_data/source_code/IQA_A-STAR/source_code/Mydemo/test_result.csv', index=False)

results_df = pd.DataFrame(results).T  # Convert results to a DataFrame for better readability
results_df

Unnamed: 0,MSE,R2
Linear Regression,0.191435,0.100082
Decision Tree Regressor,0.277962,-0.306672
MLP Regressor,0.18757,0.11825
Random Forest Regressor,0.143621,0.324849
SVM Regressor,0.207569,0.024235


In [78]:
model.feature_importances_

array([    0.13651,     0.14236,      0.1451,     0.20377,     0.26094,     0.11132])