In [None]:
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


In [None]:
with open('../data/processed/qt-coyotes-merged.json') as f:
    data = json.load(f)


In [None]:
categories_per_location = {}
categories_per_dataset = {}

image_id_to_image = {image['id']: image for image in data['images']}
category_id_to_category = {category['id']: category['name'] for category in data['categories']}
category_to_string = {
    "no_mange": "Mange Not Detected",
    "mange": "Mange Detected",
}
dataset_to_city = {
    "coyote-dens": "Edmonton",
    "mange_Toronto": "Toronto",
    "mange_images": "Chicago",
    "CHIL": "Chicago",
    "CHIL-earlier": "Chicago",
}

rows = []
for annotation in data['annotations']:
    image = image_id_to_image[annotation['image_id']]
    location = image['location']
    dataset = image['file_name'].split('/')[0]
    category = category_id_to_category[annotation['category_id']]
    category = category_to_string[category]
    location = dataset_to_city[dataset] + ": " + location
    rows.append(pd.DataFrame([[location, category, dataset]], columns=['Location', 'Category', 'Dataset']))

df = pd.concat(rows)


In [None]:
df[df['Category'] == 'mange']['Dataset'].value_counts()


In [None]:
df['Dataset'].value_counts()


In [None]:
df['Category'].value_counts()


In [None]:
categories_per_dataset


In [None]:
df = df.sort_values(by=['Location', 'Category'])


In [None]:
sns.set_theme(style="whitegrid")

g = sns.histplot(
    data=df,
    y="Dataset",
    hue="Category",
    multiple="dodge",
    shrink=.8,
)
g.set_xticklabels(g.get_xticklabels(), rotation=90)
plt.show()


In [None]:
df_mange = df[df['Category'] == 'Mange Detected']
df_no_mange = df[df['Category'] == 'Mange Not Detected']

df_no_mange = df_no_mange[df_no_mange['Location'].isin(df_mange['Location'])]


In [None]:
sns.set_style("whitegrid", {
    "font.family": "Times New Roman",
    "text.color": "black",
    "axes.labelcolor": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "xtick.labelsize": 16,
})
sns.set_context("paper", font_scale = 2)
fig, axs = plt.subplots(1, 2, figsize=(16, 48), sharex=True, sharey=True)
g = sns.histplot(
    data=df_mange,
    y="Location",
    multiple="dodge",
    ax=axs[1],
    color='orange',
    common_bins=True,
)
axs[1].margins(y=0)
axs[1].set_xscale('log')
axs[1].legend(
    labels=['Mange Detected'],
    loc='upper right',
)
g = sns.histplot(
    data=df_no_mange,
    y="Location",
    multiple="dodge",
    ax=axs[0],
    common_bins=True,
)
axs[0].margins(y=0)
axs[0].set_xscale('log')
axs[0].legend(
    labels=['Mange Not Detected'],
    loc='upper right',
)
plt.show()
