# Targets used to train WEOW models
## (notebook still under construction)

In [3]:
# Imports
from os.path import join, isdir, isfile
from os import listdir as ls
import os
import pickle
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import numpy as np
import torch
import networkx as nx

In [None]:
# To get models and images
! pip install huggingface_hub
from huggingface_hub import hf_hub_download

# Comparisons with respect to horizontal visibility

In [None]:
import os
import zipfile
from huggingface_hub import hf_hub_download

# Files containing paired comparisons:

# strict order
file_path = 'weow_semi_supervision_training_DG.pkl'
dgs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )
# incomparabilities
file_path = 'weow_semi_supervision_training_UG.pkl'
ugs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )

In [None]:
# If a timeout occurs: enable the "High-RAM" option (Runtime > Change runtime type)
# - Loading should take less than 10 seconds
with open(dgs_path, 'rb') as file:
  dgs = pickle.load(file)

In [None]:
# The variable 'dgs' contains three sets of targets: 'train', 'vali' (for Validation Intra,
# i.e., images from the training cameras but with independent pairings),
# and 'vale' (for Validation Extra, i.e., images from independent cameras).
dgs.keys()

In [None]:
# The first element of the list contains handcrafted, strictly ordered training pairs:
dg_handcrafted = dgs['train'][0]

# This first element is a graph with nodes representing images:
print(len(dg_handcrafted.nodes)) # 34726

# The image names are structured as cameraID_YYYYMMDD_HHMMSS. For example:
print(list(dg_handcrafted.nodes)[1000]) # 13853_20110220_220111.jpg

# Most of the cameraIDs refer to the AMOS directory, where images can be found.
# However, some images (3,769) come from other webcam archives. In these cases,
# the cameraID is not a number:
print(len([n for n in list(dg_handcrafted.nodes) if not n.split('_')[0].isdigit()]))

# Some of the images/edges have been fully labeled.
# Since labeling involved multiple steps, with varying levels of image-wise annotation,
# the images are annotated in different ways:
print(dg_handcrafted.nodes['13853_20110220_220111.jpg'])

# In this case, it gives a dense annoation :
# 'cam': ID of the AMOS repository (with or without the 000 prefix)
# 'snowfall': indicates if snow is falling
# 'rd_features': specific road features (eg drifts on the road)
# 'atmo': describes the weather conditions in the image
# 'noise': indicates if there are artifacts, conspicuous overlaid text, etc.
# 'mask': if there are meteorological masks (snowflake or droplets)
# 'time': lighting conditions
# 'ground': level of the snow cover
# 'visi': comparison with the previous image wrt vibility
# 'old snow_traces': presence of remaining névés
# 'compa': comparison with the previous image wrt snow cover
# 'sequence': ID of the homogenenous sequence
# 'vv' : measured visibility (if a colocated visibilimeter is available. -1 if not.)

# second example:
print(dg_handcrafted.nodes['00010103_4_20170103_093352.jpg'])
# It provides:
# 'sequence': ID of the homogeneous sequence
# 'levelvv': rank of the image in a manually sorted batch of five images


# For some edges of this directed graph, a weight has been assigned, e.g.:
print(dg_handcrafted.edges[('1002_20080117_140759.jpg', '1002_20080117_153753.jpg')])
print(dg_handcrafted.edges[('00010722_3_20170430_184302.jpg', '00010722_3_20150917_144251.jpg')])
# These edges have been manually labeled with a high degree of certainty
# (weight = 0 and weight = 1 correspond to two different annotation methods).
# The other edges may have been obtained either by transitivity or through a
# lower-quality comparison process.

# The other sets ('vali' and 'vale') are structured similarly to 'dg_handcrafted'.

In [None]:
# Other strictly ordered pairs have been obtained automatically
# using an auxiliary classifier:
automatic_edges = dgs['train'][1]
# Since there are no supplementary labels, these edges are simply listed:
print(len(automatic_edges))
# Here, the image names are formatted as "amosRepoId_sequenceId_YYYYMMDD_HHMMSS".
# The sequenceId refers to a homogeneous sequence of images in the AMOS repository.
# For example:
print(automatic_edges[0][0])

In [None]:
# The second HF dataset represents incomparable pairs of images.
with open(ugs_path, 'rb') as file:
    ugs = pickle.load(file)

# The structure of 'ugs' is the same as that of 'dgs'.
print(ugs.keys())

# However, there are two handcrafted sets:
ug_handcrafted = ugs['train'][0]
print(len(ug_handcrafted.nodes))

ug_with_noise_only = ugs['train'][1]
print(len(ug_with_noise_only.nodes))

# The second graph contains incomparabilities with images
# that do not provide any direct information about snow cover or haze.

# Moreover, the supplementary image-wise labels are lighter:
print(ug_handcrafted.nodes['32841_20170427_104304.jpg'])

# However, the edges are annotated with the type of incomparability ("toi"):
print(ug_handcrafted.edges[('2030_20121219_211528.jpg', '2030_20121220_164531.jpg')])

# It specifies, if possible, the relative positions of the output intervals Ix = [xinf, xsup] and
# Iy = [yinf, ysup] that should be associated with the first and second images.
# For example: -1, 1, -1, -1 means xinf > yinf; xinf < ysup; xsup > yinf; xsup > ysup

# The second graph only contains inclusions:
for i in range(10000,10005):
  print(list(ug_with_noise_only.edges)[i], ug_with_noise_only.edges[list(ug_with_noise_only.edges)[i]])
# The weight "2" helps distinguish these edges from those of 'ug_handcrafted' in case of a union.

# Please note that despite the name 'ug', which suggests that the graphs
# are undirected, the underlying structure is actually a directed graph.
print(ug_with_noise_only, ug_handcrafted)

# This is because the interpretation of "toi" depends on the order of the image pair.
# For example, [1, 1, -1, -1] for (Image x, Image y) means Ix should include Iy,
# but it means Ix is included in Iy for (Image y, Image x).

In [None]:
# Finally, other incomparable pairs have also been obtained automatically
# using the previously mentioned auxiliary classifier:
automatic_edges = ugs['train'][2]
print(len(automatic_edges))
print(automatic_edges[0])

# Here, the image names are formatted as "amosRepoId_sequenceId_YYYYMMDD_HHMMSS".
# The sequenceId refers to a homogeneous sequence of images in the AMOS repository.
# For these pairs, "toi" is not defined.

# Comparisons with respect to snow cover (To complete)

In [None]:
# Files containing paired comparisons (labels auto and handcrafted labels):

# strict order
file_path = 'weow_semi_supervision_training_DG_snow.pkl'
dgs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )
# incomparabilities
file_path = 'weow_semi_supervision_training_UG_snow.pkl'
ugs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )

In [7]:
with open(dgs_path, 'rb') as file:
  dgs = pickle.load(file)

In [None]:
# The variable 'dgs' contains two sets of targets: 'train', 'val'
# (here 'val' only contains images from independent cameras).
dgs.keys()

In [32]:
with open(ugs_path, 'rb') as file:
  ugs = pickle.load(file)

In [None]:
# The variable 'ugs' contains two sets of targets: 'train', 'val'
# (here 'val' only contains images from independent cameras).
ugs.keys()