### 05 - Random predictions, 20231120

In this notebook we generate random predictions over the validation set.

In [2]:
import torch
import pandas as pd
import numpy as np

from PIL import Image
from tqdm import tqdm
from torchvision.ops import box_iou

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 500)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
BASE_PATH = "../code/rec/"

In [36]:
def get_iou_column(df, bbox_pred_colname):
    df.loc[:, "bbox_raw"] = df.bbox_raw.apply(lambda v: np.resize(v, (1, 4)))
    df.loc[:, bbox_pred_colname] = df[bbox_pred_colname].apply(lambda v: np.resize(v, (1, 4)))
    df.loc[:, "iou"] = df.apply(lambda row: box_iou(torch.from_numpy(row[bbox_pred_colname]),
                                                    torch.from_numpy(row["bbox_raw"])), axis=1)
    df = df.astype({"iou": float})
    return df


def get_rec_counts(df_results, hits_col):
    counts = {
        hits_col: [],
        "counts": [],
        "rec_cls": []
    }
    for rec_cls in ["spatial", "ordinal", "relational", "intrinsic"]:
        counts["rec_cls"].append(rec_cls)
        mask_rec_cls = (df_results[rec_cls] == 1)
        counts[hits_col].append(df_results.loc[mask_rec_cls, hits_col].sum())
        counts["counts"].append(df_results.loc[mask_rec_cls, hits_col].shape[0])
    df_c = pd.DataFrame().from_dict(counts)
    df_c.loc[:, "exp_number"] = df_results.exp_number.unique()[0]
    df_c.loc[:, "exp_description"] = df_results.exp_description.unique()[0]
    df_c.loc[:, "hits_perc"] = round(df_c[hits_col] / df_c.counts * 100, 2)
    return df_c


def random_rectangle(width, height):
    a = torch.rand(2)
    x1 = a[0] * width
    y1 = a[1] * height
    b = torch.rand(2)
    x2 = x1 + b[0] * width
    y2 = y1 + b[1] * height
    return [x1, y1, x2, y2]


def generate_random_preds(df_results):
    random_bboxes = {"random_bbox": []}
    for _, row in df_results.loc[:, ["bbox_raw", "bbox_pred", "img_filename", "expr"]].iterrows():
        im = Image.open(f"{BASE_PATH}{row['img_filename']}")
        random_bboxes["random_bbox"].append(np.array(random_rectangle(im.width, im.height)))
    df_results = df_results.join(pd.DataFrame().from_dict(random_bboxes))
    return df_results

### Read validation data from all available experiments results

Following dict's explanation:


exp_d:
```
key -> number of base experiment.

values -> list of lists[
    exp_number,
    validation_predictions_path(str),
    use_visual_pos_embeddings(boolean),
    use_visual_embeddings(boolean)
    ]
```

exp_descriptions:
```
key -> number of base experiment.
values -> description of the experiment.
```

In [5]:
exp_d = {
    "exp_003": [
        ["exp_003", "predictions_val.parquet", True, True],
    ],
}
exp_descriptions = {
    "exp_003": "Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.",
}

In [46]:
df_metrics_lst_25 = []
df_metrics_lst_50 = []
for exp_base, lst_exps in exp_d.items():
    exp_base_description = exp_descriptions[exp_base]
    for exp_number, exp_filename, use_visual_pos_emb, use_visual_emb in lst_exps:
        df_results = pd.read_parquet(f"../models/{exp_number}/{exp_filename}")
        df_results.loc[:, "exp_number"] = exp_number
        df_results.loc[:, "exp_description"] = exp_base_description
        df_m_25_big = []
        df_m_50_big = []
        for i in tqdm(range(1000)):
            df = generate_random_preds(df_results)
            df = get_iou_column(df, bbox_pred_colname="random_bbox")
            df.loc[:, "hits_25"] = (df.iou > 0.25).astype(int)
            df.loc[:, "hits_50"] = (df.iou > 0.50).astype(int)
            df_m_25_big.append(get_rec_counts(df, "hits_25"))
            df_m_50_big.append(get_rec_counts(df, "hits_50"))
        df_m_25_big = pd.concat(df_m_25_big, ignore_index=True)
        df_m_50_big = pd.concat(df_m_50_big, ignore_index=True)
        df_m_25 = (
            df_m_25_big
            .groupby(["counts", "rec_cls", "exp_number", "exp_description"])
            ["hits_perc"].agg(["mean", "std"])
        ).reset_index().rename(columns={"mean": "hits_perc_mean", "std": "hits_perc_std"})
        df_m_50 = (
            df_m_50_big
            .groupby(["counts", "rec_cls", "exp_number", "exp_description"])
            ["hits_perc"].agg(["mean", "std"])
        ).reset_index().rename(columns={"mean": "hits_perc_mean", "std": "hits_perc_std"}) 
        df_metrics_lst_25.append(df_m_25)
        df_metrics_lst_50.append(df_m_50)
df_metrics_val_25 = pd.concat(df_metrics_lst_25, ignore_index=True)
df_metrics_val_50 = pd.concat(df_metrics_lst_50, ignore_index=True)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [1:07:12<00:00,  4.03s/it]


In [49]:
df_metrics_val_25

Unnamed: 0,counts,rec_cls,exp_number,exp_description,hits_perc_mean,hits_perc_std
0,90,ordinal,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,2.2194,1.511348
1,1205,relational,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,3.44688,0.519874
2,2290,intrinsic,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,6.74542,0.51795
3,4028,spatial,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,4.43749,0.309767


In [50]:
df_metrics_val_50

Unnamed: 0,counts,rec_cls,exp_number,exp_description,hits_perc_mean,hits_perc_std
0,90,ordinal,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,0.15873,0.416349
1,1205,relational,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,0.30814,0.153587
2,2290,intrinsic,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,0.7474,0.181895
3,4028,spatial,exp_003,Exp from the section 3.1 of the paper. But changing the predictions to random coordinates.,0.40384,0.096147
