In [4]:
import os
import requests
from tqdm import tqdm

download_url = "https://zenodo.org/records/4737435/files/IconArt_v2.zip"
local_file_name = "IconArt.zip"

if os.path.exists(local_file_name):
  print(f"The file '{local_file_name}' already exists. Skipping download.")

else:
  print(f"Downloading the dataset from {download_url}...")
  
  head_response = requests.head(download_url)
  file_size = int(head_response.headers.get("content-length", 0))
  
  response = requests.get(download_url, stream=True)
  response.raise_for_status()
  
  with tqdm(total=file_size, unit="B", unit_scale=True, desc=local_file_name) as pbar:
    with open(local_file_name, "wb") as file:
      for chunk in response.iter_content(chunk_size=8192):
        file.write(chunk)
        pbar.update(len(chunk))  # Update progress bar
  
  print(f"Dataset downloaded and saved as '{local_file_name}'")

Downloading the dataset from https://zenodo.org/records/4737435/files/IconArt_v2.zip...


IconArt.zip: 100%|██████████| 1.38G/1.38G [02:08<00:00, 10.8MB/s]

Dataset downloaded and saved as 'IconArt.zip'





In [6]:
import zipfile
import os

zip_file = "IconArt.zip"

extract_dir = "." 

if not os.path.exists(zip_file):
    print(f"The file '{zip_file}' does not exist. Please download it first.")
else:
    print(f"Extracting '{zip_file}' to '{extract_dir}'...")
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"Extraction complete. Files are saved in '{extract_dir}'")

    old_folder_name = "IconArt_v2"
    new_folder_name = "IconArt"

    if os.path.exists(old_folder_name):
      os.rename(old_folder_name, new_folder_name)
      print(f"Renamed '{old_folder_name}' to '{new_folder_name}'")
    else:
      print(f"The folder '{old_folder_name}' does not exist.")

Extracting 'IconArt.zip' to '.'...
Extraction complete. Files are saved in '.'
Renamed 'IconArt_v2' to 'IconArt'


In [4]:
import os
import json
import pandas as pd

dataset_folder = "IconArt"
dataset_data_folder = "IconArt-data"

# Read the CSV file
csv_file_path = os.path.join(dataset_folder, "ImageSets", "Main", "IconArt_v2.csv")
df = pd.read_csv(csv_file_path)

# Extract columns that are numbers (remove 'anno')
columns_to_keep = [col for col in df.columns if col not in ['item', 'set', 'Anno']]



# Create a dictionary from the comments
data_dict = {
  'Saint_Sebastien': ['11H(SEBASTIAN)', 'The Martyr Sebastian; Possible Attributes: Arrow(s), Bow, Tree-Trunk'],
  'turban': ['41D221(TURBAN)', 'Head-Gear: Turban'],
  'crucifixion_of_Jesus': ['11D356', 'Christ Crucified On A \'Living\' Cross'],
  'angel': ['11G', 'Angels'],
  'capital': ['48C1612', 'Capital (~ Column, Pillar)'],
  'Mary': ['11F', 'The Virgin Mary'],
  'beard': ['31A534', 'Beard'],
  'Child_Jesus': ['11D2', 'Christ As Child Or Youth (In General) ~ Christian Religion'],
  'nudity': ['31A', 'The (Nude) Human Figure; \'Corpo Humano\' (Ripa)'],
  'ruins': ['48C149', 'Ruin Of A Building ~ Architecture']
}

# Create a new DataFrame directly from data_dict
df_new = pd.DataFrame.from_dict(data_dict, orient='index', columns=['ID', 'Description']).reset_index()
df_new.rename(columns={'index': 'Label'}, inplace=True)
df_new['Label'] = df_new['Label'].str.replace('_', ' ').str.title()

# Save Classes
output_csv_path = os.path.join(dataset_data_folder, "classes.csv")
df_new.to_csv(output_csv_path, index=False)
print(f"DataFrame saved as '{output_csv_path}'")

# Copy test file to data folder
output_csv_path = os.path.join(dataset_data_folder, "2_test.txt")
test_file = os.path.join(dataset_folder, 'ImageSets', 'Main', "test.txt")

os.makedirs(dataset_data_folder, exist_ok=True)
with open(test_file, 'r') as src, open(output_csv_path, 'w') as dst:
  for line in src:
    dst.write(line.strip() + '\n')
print(f"Test file copied to '{output_csv_path}'")

df_new


DataFrame saved as 'IconArt-data/classes.csv'
Test file copied to 'IconArt-data/2_test.txt'


Unnamed: 0,Label,ID,Description
0,Saint Sebastien,11H(SEBASTIAN),The Martyr Sebastian; Possible Attributes: Arr...
1,Turban,41D221(TURBAN),Head-Gear: Turban
2,Crucifixion Of Jesus,11D356,Christ Crucified On A 'Living' Cross
3,Angel,11G,Angels
4,Capital,48C1612,"Capital (~ Column, Pillar)"
5,Mary,11F,The Virgin Mary
6,Beard,31A534,Beard
7,Child Jesus,11D2,Christ As Child Or Youth (In General) ~ Christ...
8,Nudity,31A,The (Nude) Human Figure; 'Corpo Humano' (Ripa)
9,Ruins,48C149,Ruin Of A Building ~ Architecture


In [6]:
import os
import json
import pandas as pd

dataset_folder = "IconArt"
dataset_data_folder = "IconArt-data"

# Define paths
image_dir = "IconArt/JPEGImages"
json_file_path = "IconArt-data/2_ground_truth.json"
csv_file_path = os.path.join(dataset_folder, "ImageSets", "Main", "IconArt_v2.csv")
test_file = os.path.join(dataset_data_folder, "2_test.txt")



# Read the CSV file
df = pd.read_csv(csv_file_path)

# Read the test file
with open(test_file, 'r') as file:
  test_items = file.read().splitlines()

ground_truth_data = []

# Process each image in the test file
for item in test_items:
  row = df[df['item'] == item]
  if row.empty:
    print(f"Warning: No matching row found in CSV for item '{item}'. Skipping...")
    continue
  
  row = row.iloc[0]

  # Find the column that is 1 and is in the columns_to_keep list
  for col in columns_to_keep:
    if row[col] == 1:
      ground_truth_data.append({
        "item": item,
        "class": col
      })
      break

# Save ground truth data to JSON file
with open(json_file_path, 'w') as json_file:
  json.dump(ground_truth_data, json_file, indent=4)

print(f"Ground truth data has been saved to {json_file_path}")

Ground truth data has been saved to IconArt-data/2_ground_truth.json
