In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import os
import ast
from analyze_masks import analyze_masks_and_list_exceptions

def categorize_data(data, column):
    """ Categorize the data based on quantiles for a specific column """
    q33 = data[column].quantile(0.33)
    q66 = data[column].quantile(0.66)
    
    category_labels = ["small", "medium", "large"]
    
    conditions = [
        (data[column] <= q33),
        (data[column] > q33) & (data[column] <= q66),
        (data[column] > q66)
    ]
    
    data[f'{column}_category'] = np.select(conditions, category_labels)
    return data

In [2]:
def load_and_process_data(file_path, base_dir, masks_dir):
    data = pd.read_csv(file_path)
    confidence_columns = [col for col in data.columns if 'confidence' in col]
    for column in confidence_columns:
        data[column] = data[column].apply(ast.literal_eval)

    data['width'] = 0
    data['height'] = 0
    data['object_percentage'] = 0.0

    class_to_grayscale_map = analyze_masks_and_list_exceptions(masks_dir)

    for index, row in data.iterrows():
        class_id = row['id'].split('_')[0]
        picture_name = row['picture_name']
        picture_base_name = os.path.splitext(picture_name)[0]

        img_path = os.path.join(base_dir, 'train', class_id, picture_name)
        try:
            with Image.open(img_path) as img:
                width, height = img.size
                data.at[index, 'width'] = width
                data.at[index, 'height'] = height
        except FileNotFoundError:
            print(f"Image not found: {img_path}")

        mask_name = picture_base_name + '.png'
        mask_path = os.path.join(masks_dir, class_id, mask_name)
        try:
            with Image.open(mask_path) as mask:
                mask_array = np.array(mask)
                if class_id in class_to_grayscale_map:
                    relevant_value = class_to_grayscale_map[class_id]
                    object_pixels = np.sum(mask_array == relevant_value)
                    total_pixels = width * height
                    data.at[index, 'object_percentage'] = (object_pixels / total_pixels) * 100
        except FileNotFoundError:
            print(f"Mask not found for image: {mask_path}")

    data = categorize_data(data, 'object_percentage')

    return data

In [3]:
def parse_synset_mapping(filepath):
    with open(filepath, 'r') as file:
        class_dict = {}
        for row_number, line in enumerate(file, start=1):
            class_id, description = line.strip().split(' ', 1)
            class_dict[class_id] = {
                "description": description,
                "value": row_number - 1
            }
    return class_dict

In [4]:
base_dir = '../data'
masks_dir = '../data/masks'
file_path_resnet = '../image_confidence_scores_resnet.csv'
file_path_convnext = '../image_confidence_scores_convnext.csv'
synset_path = '../data/LOC_synset_mapping.txt'

data_resnet = load_and_process_data(file_path_resnet, base_dir, masks_dir)
data_convnext = load_and_process_data(file_path_convnext, base_dir, masks_dir)
class_dict = parse_synset_mapping(synset_path)

category_distribution_resnet = data_resnet['object_percentage_category'].value_counts()

Class: n02412080, Most Common Nonzero Grayscale Value (by presence): 17, Presence Count: 982
    Images without the most common grayscale value (17): ['n02412080_13145.png', 'n02412080_1976.png', 'n02412080_6399.png', 'n02412080_16811.png', 'n02412080_2188.png', 'n02412080_19324.png', 'n02412080_18733.png', 'n02412080_2270.png', 'n02412080_16830.png', 'n02412080_16254.png', 'n02412080_26458.png', 'n02412080_3944.png', 'n02412080_1040.png', 'n02412080_10804.png', 'n02412080_13818.png', 'n02412080_11852.png', 'n02412080_791.png', 'n02412080_17063.png']
Class: n02107574, Most Common Nonzero Grayscale Value (by presence): 12, Presence Count: 997
    Images without the most common grayscale value (12): ['n02107574_3660.png', 'n02107574_142.png', 'n02107574_690.png']
Class: n01833805, Most Common Nonzero Grayscale Value (by presence): 3, Presence Count: 987
    Images without the most common grayscale value (3): ['n01833805_4117.png', 'n01833805_166.png', 'n01833805_8510.png', 'n01833805_885

In [5]:
import pickle

def save_df(df, path):
    with open(path, 'wb') as f:
        pickle.dump(df, f)

def load_df(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

In [6]:
!pwd

/home/paul/Coding/Studies/Magisterka/BackgroundImpactAnalysis/analysis


In [7]:
import os

resnet_results_path = os.path.join('results', 'resnet_df.pkl')
convnext_results_path = os.path.join('results', 'convnext_df.pkl')
save_df(data_resnet, resnet_results_path)
save_df(data_convnext, convnext_results_path)

In [11]:
resnet_df = load_df(resnet_results_path)
convnext_df = load_df(convnext_results_path)



In [12]:
resnet_df.head()

Unnamed: 0,id,picture_name,original_confidence,desert_confidence,low_contrast_confidence,city_confidence,sky_confidence,jungle_confidence,no_bg_confidence,high_contrast_confidence,no_foreground_confidence,water_confidence,snow_confidence,indoor_confidence,mountain_confidence,width,height,object_percentage,object_percentage_category
0,n02412080_5030,n02412080_5030.JPEG,"[{349: 58.83927917480469}, {348: 41.1606674194...","[{348: 80.97709655761719}, {349: 19.0206565856...","[{349: 50.55152130126953}, {348: 49.4327011108...","[{348: 69.93463897705078}, {349: 29.0129756927...","[{348: 82.9874496459961}, {349: 17.00742912292...","[{348: 72.2722396850586}, {349: 27.72670745849...","[{349: 59.538448333740234}, {348: 40.453044891...","[{348: 87.86559295654297}, {349: 12.1336231231...","[{240: 29.558765411376953}, {206: 26.022109985...","[{348: 79.90464782714844}, {349: 20.0350189208...","[{349: 67.0143051147461}, {348: 32.98325347900...","[{453: 26.773107528686523}, {548: 26.111438751...","[{349: 77.03804016113281}, {348: 22.8792839050...",375,500,8.978133,small
1,n02412080_17477,n02412080_17477.JPEG,"[{348: 75.89208984375}, {258: 3.76292085647583...","[{348: 22.87215805053711}, {87: 10.36894226074...","[{348: 69.40206146240234}, {87: 16.82721900939...","[{348: 88.52837371826172}, {850: 2.23613858222...","[{348: 69.02066802978516}, {222: 6.11425399780...","[{348: 65.1850814819336}, {222: 3.221411705017...","[{348: 88.84029388427734}, {850: 1.85134780406...","[{348: 86.04642486572266}, {87: 3.177434206008...","[{295: 34.19534683227539}, {223: 21.9509468078...","[{348: 51.148841857910156}, {232: 8.2428951263...","[{348: 80.98275756835938}, {222: 9.67160511016...","[{87: 16.747709274291992}, {229: 16.6465797424...","[{348: 92.80342864990234}, {349: 1.86104285717...",500,375,31.309867,medium
2,n02412080_16034,n02412080_16034.JPEG,"[{348: 81.07646942138672}, {346: 15.2576045989...","[{348: 93.87834167480469}, {349: 5.38352727890...","[{348: 62.50176239013672}, {346: 31.4832382202...","[{348: 58.9777717590332}, {346: 35.92766189575...","[{348: 94.13262939453125}, {349: 3.80416369438...","[{348: 95.7903823852539}, {349: 3.000267505645...","[{348: 88.51309204101562}, {349: 10.9447765350...","[{348: 87.12995910644531}, {346: 8.88073921203...","[{295: 56.9707145690918}, {367: 10.55847072601...","[{348: 95.26390075683594}, {349: 4.33112287521...","[{348: 75.68328094482422}, {349: 15.6096086502...","[{348: 89.99002838134766}, {346: 8.81335258483...","[{348: 93.17617797851562}, {349: 6.81207561492...",500,333,44.206607,large
3,n02412080_10463,n02412080_10463.JPEG,"[{348: 53.75434112548828}, {349: 46.2355804443...","[{349: 70.88371276855469}, {348: 29.1098766326...","[{349: 62.867820739746094}, {348: 37.111083984...","[{348: 50.26985549926758}, {349: 49.7178497314...","[{349: 60.222434997558594}, {348: 39.769466400...","[{348: 52.65109634399414}, {349: 47.3390312194...","[{349: 69.63265991210938}, {348: 30.3539543151...","[{349: 58.87968063354492}, {348: 41.1069107055...","[{649: 11.73153305053711}, {783: 8.55524921417...","[{349: 50.064979553222656}, {348: 49.915950775...","[{349: 70.59010314941406}, {348: 29.3000431060...","[{349: 59.05855941772461}, {348: 40.9116325378...","[{349: 68.4893569946289}, {348: 31.45873260498...",500,355,53.412394,large
4,n02412080_1630,n02412080_1630.JPEG,"[{349: 89.70816802978516}, {348: 7.71903610229...","[{349: 87.25608825683594}, {348: 11.7992248535...","[{349: 83.90713500976562}, {348: 11.8507184982...","[{349: 83.45658111572266}, {348: 15.3807487487...","[{349: 82.66082763671875}, {348: 15.7830181121...","[{349: 65.84980773925781}, {348: 28.5984630584...","[{349: 83.1801528930664}, {348: 12.02587223052...","[{349: 77.55449676513672}, {348: 19.3704204559...","[{349: 91.06561279296875}, {350: 6.40204620361...","[{349: 67.72295379638672}, {348: 31.1795883178...","[{349: 88.16963195800781}, {348: 9.47043609619...","[{349: 74.47144317626953}, {348: 17.9814033508...","[{349: 88.70124053955078}, {348: 8.62641143798...",500,400,19.371,medium


In [13]:
convnext_df.head()

Unnamed: 0,id,picture_name,original_confidence,desert_confidence,low_contrast_confidence,city_confidence,sky_confidence,jungle_confidence,no_bg_confidence,high_contrast_confidence,no_foreground_confidence,water_confidence,snow_confidence,indoor_confidence,mountain_confidence,width,height,object_percentage,object_percentage_category
0,n02412080_5030,n02412080_5030.JPEG,"[{349: 46.37338638305664}, {348: 22.3368320465...","[{349: 39.178245544433594}, {348: 28.748394012...","[{349: 35.91504669189453}, {348: 22.0010223388...","[{349: 36.26168441772461}, {348: 25.0445537567...","[{348: 32.841434478759766}, {349: 32.712715148...","[{349: 34.654090881347656}, {348: 28.669761657...","[{349: 37.570411682128906}, {348: 22.975315093...","[{349: 35.65518569946289}, {348: 22.9324207305...","[{348: 32.19147872924805}, {349: 7.23252868652...","[{349: 33.616207122802734}, {348: 33.501949310...","[{349: 52.29624557495117}, {348: 17.1594505310...","[{349: 34.62197494506836}, {348: 18.7580699920...","[{349: 49.499183654785156}, {348: 23.585716247...",375,500,8.978133,small
1,n02412080_17477,n02412080_17477.JPEG,"[{348: 78.17762756347656}, {825: 1.48364901542...","[{348: 78.90536499023438}, {232: 1.66065859794...","[{348: 81.8041000366211}, {911: 0.411276996135...","[{348: 85.49967956542969}, {349: 0.71888679265...","[{348: 77.87525939941406}, {232: 0.57592558860...","[{348: 84.75343322753906}, {911: 0.24644431471...","[{348: 79.17779541015625}, {911: 0.40356552600...","[{348: 80.49481201171875}, {911: 0.41382187604...","[{825: 30.929214477539062}, {223: 19.813499450...","[{348: 81.6128158569336}, {349: 0.437838435173...","[{348: 81.91619873046875}, {349: 2.49655222892...","[{348: 81.7082290649414}, {257: 0.671802699565...","[{348: 85.5592041015625}, {349: 0.925480008125...",500,375,31.309867,medium
2,n02412080_16034,n02412080_16034.JPEG,"[{348: 79.15498352050781}, {349: 1.11444795131...","[{348: 75.2369613647461}, {349: 3.122596263885...","[{348: 77.33294677734375}, {349: 1.20119619369...","[{348: 73.82963562011719}, {349: 3.74569892883...","[{348: 77.58546447753906}, {349: 2.35443902015...","[{348: 79.86063385009766}, {349: 0.94149577617...","[{348: 73.18643188476562}, {349: 3.37986016273...","[{348: 73.70994567871094}, {349: 1.97658681869...","[{8: 52.488834381103516}, {7: 7.37766408920288...","[{348: 76.11611938476562}, {349: 2.58980941772...","[{348: 58.09589767456055}, {349: 9.19317531585...","[{348: 73.71327209472656}, {349: 5.06534957885...","[{348: 64.670166015625}, {349: 9.8425140380859...",500,333,44.206607,large
3,n02412080_10463,n02412080_10463.JPEG,"[{348: 40.40224075317383}, {349: 34.6419982910...","[{349: 41.73984146118164}, {348: 34.3252258300...","[{348: 37.63783264160156}, {349: 36.9719772338...","[{349: 39.782447814941406}, {348: 33.380031585...","[{348: 36.8717041015625}, {349: 33.99232101440...","[{348: 33.92774963378906}, {349: 31.1646614074...","[{349: 38.27436065673828}, {348: 37.9743652343...","[{348: 37.928951263427734}, {349: 37.408428192...","[{295: 30.554973602294922}, {286: 15.844132423...","[{349: 39.00667190551758}, {348: 37.3483467102...","[{349: 43.786773681640625}, {348: 32.056297302...","[{349: 41.51083755493164}, {348: 34.5482330322...","[{349: 39.187191009521484}, {348: 37.625530242...",500,355,53.412394,large
4,n02412080_1630,n02412080_1630.JPEG,"[{349: 49.98129653930664}, {348: 23.4843139648...","[{349: 44.77097702026367}, {348: 23.3637504577...","[{348: 32.67023468017578}, {349: 31.9590415954...","[{349: 42.25478744506836}, {348: 28.8193016052...","[{349: 34.50698471069336}, {348: 28.8213005065...","[{349: 33.63731384277344}, {348: 33.5293846130...","[{349: 37.412513732910156}, {348: 30.247661590...","[{349: 33.93567657470703}, {348: 27.8054199218...","[{349: 64.15687561035156}, {348: 11.3354635238...","[{349: 35.36492156982422}, {348: 34.1064186096...","[{349: 44.9776725769043}, {348: 26.35713577270...","[{349: 47.26266098022461}, {348: 26.0018882751...","[{349: 44.0875244140625}, {348: 26.94127273559...",500,400,19.371,medium
