<a href="https://colab.research.google.com/github/ktichola/Image-matching/blob/main/IMC_Data_Description.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'image-matching-challenge-2024:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F71885%2F8143495%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240514%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240514T112754Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Db19caa8764194503bd82c419db5b2bf1b8fb11896c183b943e59f04d752d655a70d7e6a0c3ff6ab10d11c47103798f681cae3918cff5656dd563dd4180c1aa8e9df7b888772a6ed7e6e3359be752c2a40567183323050d6253f793dea33c0f5de7f6403354743614e1579ba0d8320acc10ad54a19ed91efa2c86f099a18668645118ae1dd618407d46fecc71b2646c4edfbf518c9fd1481eedc7236a1334ed8e439e2668949b22d1998d1beb7405d35ed2178124acaedd374ac21032555fd29b502a49308cca7be1cb7eaee9cfd22f4a22703f7eb50ed8e2774f172479244ce963fd26d7c076d1dc13fd57d35bb3803fb4da0d97ac3aa3a1b92af879a88cacf3'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


# <div style="padding: 25px;color:white;margin:10;font-size:60%;text-align:left;display:fill;border-radius:10px;background-color:#FFFFFF;overflow:hidden;background-color:#87CEEB"><b><span style='color:#FFFFFF'></span></b> <b>1. Domain Explanation</b></div>

#### <b><span style='color:#87CEEB'> |</span> The Previous Competition</b>

📌**[2023 Edition of the competiton](https://www.kaggle.com/competitions/image-matching-challenge-2023)**: Reconstruct 3D scenes form 2D images

📌 **[2022 Edition of the competition](https://www.kaggle.com/competitions/image-matching-challenge-2022)**: Register two images from different viewpoints

#### <b><span style='color:#87CEEB'> |</span> The Goal of this Competition</b>

<b>*Construct precise 3D maps using sets of images in diverse scenarios and environments*</b>

#### <b><span style='color:#87CEEB'> |</span> Six Different Domains</b>

- <b><span style='color:#1E90FF'> Phototourism and historical preservation</span></b>: different viewpoints, sensor types, time of day/year, and occlusions. Ancient historical sites add a unique set of challenges

- <b><span style='color:#1E90FF'>Night vs day and temporal changes</span></b>: combination of day and night photographs, including poor lighting, or photographs taken months or years apart, in
different weather

- <b><span style='color:#1E90FF'>Aerial and mixed aerial-ground</span></b>:  images from drones, featuring arbitrary in-plane rotations, matched against similar images and also images taken from the ground

- <b><span style='color:#1E90FF'>Repeated structures</span></b>: symmetrical objects require details to disambiguate perspective

- <b><span style='color:#1E90FF'>Natural environments</span></b>: highly non-regular structures such as trees and foliage

- <b><span style='color:#1E90FF'>Transparencies and reflections</span></b>: objects like glassware are lacking in texture and create reflections and specularities which pose a different set of problems

# <div style="padding: 25px;color:white;margin:10;font-size:60%;text-align:left;display:fill;border-radius:10px;background-color:#FFFFFF;overflow:hidden;background-color:#87CEEB"><b><span style='color:#FFFFFF'></span></b> <b>2. Import and Read Data</b></div>

In [None]:
import os
import gc
import ctypes
import random
import sys

from tqdm import tqdm
from time import time, sleep

from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn

import warnings
warnings.filterwarnings('ignore')

SEED = 2024

*Clean Memory*

In [None]:
def clean_memory():
    ctypes.CDLL('libc.so.6').malloc_trim(0)
    gc.collect()

clean_memory()

*Seed Everything*

In [None]:
def seed_everything():
    random.seed(SEED)
    os.environ['PYTHONHASHSEED'] = str(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministric = True
    torch.backends.cudnn.benchmark = True

seed_everything()

## Dataset Description

#### <b><span style='color:#87CEEB'> |</span> [train/test]images</b>

📌 A batch of images all taken near the same location. Some of training datasets may also contain a folder named images_full with additional images. The published test folder comprises a subset of the church scene from train and is provided solely for example purposes. The training data usually has a sequential capture ordering and significant image-to-image content overlap while the test set has limited image-to-image overlap and the image ordering is randomized.

#### <b><span style='color:#87CEEB'> |</span> train/smf</b>

📌 A 3-D reconstruction for this batch of images, which can be opened with colmap, the 3-D structure-from-motion library bundled with this competition

#### <b><span style='color:#87CEEB'> |</span> train/LICENSE.txt</b>

📌 The license for this dataset

#### <b><span style='color:#87CEEB'> |</span> train_labels.csv</b>

- `dataset`: The unique identifier for the dataset

- `scene`: The unique identifier for the scene

- `image_name`:  The image filename

- `rotation_matrix`: The first target column. A 3X3 matrix, flattened into a vector in row-major convention with values separated by ;

- `translation_vetcor`: The second target column. A 3-D dimensional vetcor, with values separated by ;

In [None]:
train_labels = pd.read_csv('/kaggle/input/image-matching-challenge-2024/train/train_labels.csv')
print('Shape of train_labels:', train_labels.shape)
print(display(train_labels))

In [None]:
plt.figure(figsize=(12,6))

plt.subplot(1,2,1)
sns.countplot(x=train_labels['dataset'])
plt.xticks(rotation=90)
plt.title("Distribution of Dataset")


plt.subplot(1,2,2)
sns.countplot(x=train_labels['scene'])
plt.xticks(rotation=90)
plt.title('Distribution of Scene')
plt.show()

In [None]:
train_labels.loc[train_labels['scene'] != train_labels['dataset']]

#### <b><span style='color:#87CEEB'> |</span> categories.csv</b>

- **`chruch`**: <b><span style='color:#1E90FF'>symmetries-and-repeats</span></b>

- **`dioscuri`**: <b><span style='color:#1E90FF'>historical_pressrvation;air-to-ground</span></b>

- **`lizard`**: <b><span style='color:#1E90FF'>day-night;temporal</span></b>

- **`multi-temporal-temple-baalshamin`**: <b><span style='color:#1E90FF'>historical_presservation;temporal</span></b>

- **`pond`**: <b><span style='color:#1E90FF'>day-night;temporal;nature</span></b>

- **`transp_obj_glass_cup`**: <b><span style='color:#1E90FF'>symmetries-and-repeats;transparent</span></b>

- **`transp_obj_glass_cylinder`**: <b><span style='color:#1E90FF'>symmetries-and-repeats;transparent</span></b>

In [None]:
category = pd.read_csv('/kaggle/input/image-matching-challenge-2024/train/categories.csv')

explode = (0.01, 0.01, 0.01, 0.01, 0.01, 0.01)
labels = category['categories'].unique()

plt.figure(figsize=(12,6))
plt.title("Distribution of Category")
plt.pie(category['categories'].value_counts(), labels=labels, explode=explode, autopct='%.1f%%')
plt.show()

#### <b><span style='color:#87CEEB'> |</span> sample_submission.csv</b>

- `image_path`: The image filename, including the path

- `dataset`: The unique identifier for the dataset

- `scene`:  The unique identifier for the scene

- `rotation_matrix`: The first target column. A 3X3 matrix, flattened into a vector in row-major convention with values separated by ;

- `translation_vetcor`: The second target column. A 3-D dimensional vetcor, with values separated by ;

In [None]:
sample_submission = pd.read_csv('/kaggle/input/image-matching-challenge-2024/sample_submission.csv')
print(display(sample_submission.head()))