## Targets used to train WEOW models
# (notebook still under construction)

In [1]:
# Imports
from os.path import join, isdir, isfile
from os import listdir as ls
import os
import pickle
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import numpy as np
import torch
import networkx as nx

In [None]:
# To get models and images
! pip install huggingface_hub
from huggingface_hub import hf_hub_download

In [7]:
import os
import zipfile
from huggingface_hub import hf_hub_download

# Download a directed graphs (DG)...
file_path = 'weow_semi_supervision_training_DG.pkl'
dgs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )
# ...an untransitive undirected graphs (UG)...
file_path = 'weow_semi_supervision_training_UG.pkl'
ugs_path =  hf_hub_download(repo_id="nanopiero/weow_training_graphs",
                            filename=file_path,
                            repo_type="dataset"
                            )

In [85]:
# in case of timeout: turn on the option High-RAM (Runtime/change runtime type)
# - the loading should take less than 10 sec
with open(dgs_path, 'rb') as file:
  dgs = pickle.load(file)

In [None]:
# dgs contains three sets of targets, 'train', 'vali' (for Validation Intra,
# ie images coming from the train cameras, but unseen pairs, and for Validation
# Extra, ie images coming from independant cameras)
dgs.keys()

In [None]:
# Handcrafted strict ordered training pairs are contained in the first element of the list:
dg_handcrafted = dgs['train'][0]

# This first element is a graph, the nodes of which are images
print(len(dg_handcrafted.nodes)) # 34726

# The image names are strucured as cameraID_YYYYMMDD_HHMMSS. For example:
print(list(dg_handcrafted.nodes)[1000]) # 13853_20110220_220111.jpg

# Most of the cameraID refer to the AMOS directory, where images could be found
# but some images (3769) are coming from other webcam archives. In this case,
# the cameraID is not a number:
print(len([n for n in list(dg_handcrafted.nodes) if not n.split('_')[0].isdigit()]))

# Some of the images/edges have been fully labeled.
# As labelling involved several steps with a more or less deep image-wise annotation,
# The images are diversely annotated:
print(dg_handcrafted.nodes['13853_20110220_220111.jpg'])

# In this case, it gives a dense annoation :
# 'cam': id of the AMOS repository (with or without the 000 prefix)
# 'snowfall': if snow is falling
# 'rd_features': specific road features (eg drifts on the road)
# 'atmo': how is the weather from the image
# 'noise': 'no', if there are artefacts, hardprinted letters etc
# 'mask': 'droplets', if there are meteorological masks (snowflake or droplets)
# 'time': lighting conditions
# 'ground': level of the snow cover
# 'visi': comparison with the previous image wrt vibility
# 'old snow_traces': presence of remaining neves
# 'plus_snow': '',
# 'compa': comparison with the rpevious image wrt snow-cover
# 'sequence': 287, id of the homogenenous sequence
# 'vv' : measured visibility (if a colocalized visibilimeter is available. -1 if not.)

# second example:
print(dg_handcrafted.nodes['00010103_4_20170103_093352.jpg'])
# It gives :
# 'sequence': id of the homogeneous sequence
# 'levelvv': rank of the image in a manually sorted batch of five images

# For some edges of this directed graph, a weight has been given, eg:
print(dg_handcrafted.edges[('1002_20080117_140759.jpg', '1002_20080117_153753.jpg')])
print(dg_handcrafted.edges[('00010722_3_20170430_184302.jpg', '00010722_3_20150917_144251.jpg')])
# These edges have been labelled manually with a high degree of certainty
# (weight = 0 and weight = 1 correspond to two different annotation methods)
# The other edges could have been obtained either by transitivity or by a
# a lower-quality process of comparison.

# The other sets ('vali' and 'vale') are structured as dg_handcrafted

In [None]:
# Other strictly ordered pairs have been obtained automatically
# thanks to an accessory classifier:
automatic_edges = dgs['train'][1]
# as there is no supplementary labels, these edges were simply listed:
print(len(automatic_edges))
# here, the image names are formatted on "amosRepoId_sequenceId_YYYYMMDD_HHMMSS"
# sequenceId refer to an homogeneous sequence of images of the AMOS Repo.
# For example:
print(automatic_edges[0][0])

In [None]:
# The second hf dataset represent incomparable pairs of images
# with open(ugs_path, 'rb') as file:
#   ugs = pickle.load(file)

# The structure of ugs is the same as the structure of dgs:
print(ugs.keys())

# However, there is two handcrafted sets:
ug_handcrafted = ugs['train'][0]
print(len(ug_handcrafted.nodes))

ug_with_noise_only = ugs['train'][1]
print(len(ug_with_noise_only.nodes))

# the second graph contain incomparability with images
# that tell nothing (directly) about snow-cover or haze

# Moover, the supplementary image-wise labels are lighter:
print(ug_handcrafted.nodes['32841_20170427_104304.jpg'])
# But the edges are annotated with type of incomparability ("toi"):
print(ug_handcrafted.edges[('2030_20121219_211528.jpg', '2030_20121220_164531.jpg')])
# It specifies, if possible, the relative positions of the output intervals Ix = [xinf, xsup] and
# Iy = [yinf, ysup] that should be associated with the first and second image.
# For example: -1, 1, -1, -1 means xinf > yinf ; xinf < ysup ; xsup > yinf ; xsup > ysup
# the second graphs only contain inclusions:
for i in range(10000,10005):
  print(list(ug_with_noise_only.edges)[i], ug_with_noise_only.edges[list(ug_with_noise_only.edges)[i]])
# the weight "2" helps ot distinguish these edges from those of ug_handcrafted in case of union

# Please note that, despite the name ug, which suggest that the graphs
# are undirected, the underlying structure is a directed graph.
print(ug_with_noise_only, ug_handcrafted)

# This is because the interpretation of the toi depends on how the image pair is ordered.
# For example, [1, 1, -1, -1] for (Image x, Image y) means Ix should includes Iy
# but it means Ix is included in Iy for (Image y, Image x).

In [16]:
# Fainally other incomparable pairs have also been obtained automatically
# thanks to the previously quoted accessory classifier:
automatic_edges = ugs['train'][2]
print(len(automatic_edges))
# here, the image names are formatted on "amosRepoId_sequenceId_YYYYMMDD_HHMMSS"
# sequenceId refer to an homogeneous sequence of images of the AMOS Repo.
# For these pairs, toi are not defined:
print(automatic_edges[0])

23165147
('00010101_0_20100317_170906.jpg', '00010101_0_20101031_142130.jpg')
