# Coloured Semantic 3D points
## Prerequisites:
Text-files from Sparse reconstruction:
- points3D.txt
- images.txt


ADEK20K segmentation of every frame:
- segmentation_offset10.npy

## Premade funtions (COLMAP)

In [30]:
import os
import collections
import numpy as np
import struct
import argparse

BaseImage = collections.namedtuple(
    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
Point3D = collections.namedtuple(
    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])

class Image(BaseImage):
    def qvec2rotmat(self):
        return qvec2rotmat(self.qvec)

def qvec2rotmat(qvec):
    return np.array([
        [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
         1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
         1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])    

    
def read_points3D_text(path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DText(const std::string& path)
        void Reconstruction::WritePoints3DText(const std::string& path)
    """
    points3D = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                point3D_id = int(elems[0])
                xyz = np.array(tuple(map(float, elems[1:4])))
                rgb = np.array(tuple(map(int, elems[4:7])))
                error = float(elems[7])
                image_ids = np.array(tuple(map(int, elems[8::2])))
                point2D_idxs = np.array(tuple(map(int, elems[9::2])))
                points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
                                               error=error, image_ids=image_ids,
                                               point2D_idxs=point2D_idxs)
    return points3D

def read_images_text(path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesText(const std::string& path)
        void Reconstruction::WriteImagesText(const std::string& path)
    """
    images = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                image_id = int(elems[0])
                qvec = np.array(tuple(map(float, elems[1:5])))
                tvec = np.array(tuple(map(float, elems[5:8])))
                camera_id = int(elems[8])
                image_name = elems[9]
                elems = fid.readline().split()
                xys = np.column_stack([tuple(map(float, elems[0::3])),
                                       tuple(map(float, elems[1::3]))])
                point3D_ids = np.array(tuple(map(int, elems[2::3])))
                images[image_id] = Image(
                    id=image_id, qvec=qvec, tvec=tvec,
                    camera_id=camera_id, name=image_name,
                    xys=xys, point3D_ids=point3D_ids)
    return images

def write_points3D_text(points3D, path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DText(const std::string& path)
        void Reconstruction::WritePoints3DText(const std::string& path)
    """
    if len(points3D) == 0:
        mean_track_length = 0
    else:
        mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
    HEADER = "# 3D point list with one line of data per point:\n" + \
             "#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \
             "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length)

    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, pt in points3D.items():
            point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
            fid.write(" ".join(map(str, point_header)) + " ")
            track_strings = []
            for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
                track_strings.append(" ".join(map(str, [image_id, point2D])))
            fid.write(" ".join(track_strings) + "\n")
            
def write_images_text(images, path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesText(const std::string& path)
        void Reconstruction::WriteImagesText(const std::string& path)
    """
    if len(images) == 0:
        mean_observations = 0
    else:
        mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
    HEADER = "# Image list with two lines of data per image:\n" + \
             "#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \
             "#   POINTS2D[] as (X, Y, POINT3D_ID)\n" + \
             "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations)

    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, img in images.items():
            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
            first_line = " ".join(map(str, image_header))
            fid.write(first_line + "\n")

            points_strings = []
            for xy, point3D_id in zip(img.xys, img.point3D_ids):
                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
            fid.write(" ".join(points_strings) + "\n")

## Load txt files

In [31]:
pts = read_points3D_text("_COLMAP/Model/Text/points3D.txt")

In [32]:
ims = read_images_text("_COLMAP/Model/Text/images.txt")

# Gettin Mapping from 3D to ImageID, X, Y

Create dict going from 3D image IDs to all 2D image IDs that contrivute to point,
in second step get mapping from 3D IDs to 2D IDs, 2D name, and XY-coordinates

In [35]:
dict_3D_to_imageIDs = {}

dict_3D_to_imageName_XY = {}

for k in pts.keys():
    dict_3D_to_imageIDs[k] = np.column_stack((pts[k].image_ids,pts[k].point2D_idxs))
    #Left: imageId, right: index of x,y
    
for k in dict_3D_to_imageIDs.keys():
    #Make list for all corresponding images
    dict_3D_to_imageName_XY[k] = []
    for tup in dict_3D_to_imageIDs[k]:
        #For all images mentioend in 3D point, get tuple of name, imageID and X,Y
        dict_3D_to_imageName_XY[k].append((ims[tup[0]].name, tup[1], list(np.rint(ims[tup[0]].xys[tup[1]]).astype(int))))

Load segmentation numpy 3D array (shape: #frames, width, height)

In [37]:
segmentation_labels = np.load("segmentation_offset10.npy")
print(segmentation_labels.shape)

(1050, 1080, 1920)


Get mapping from 3D ID to all segmentation labels for this point

In [38]:
import re

dict_3D_to_labels = {}

for k in dict_3D_to_imageName_XY:
    label_list = []
    for trip in dict_3D_to_imageName_XY[k]:
        #Extract frame number from image name (e.g., "out23.png" -> 23)
        frame_number = int(re.search(r'\d+', trip[0]).group())
        if (frame_number % 10 == 0):
            frame_number /= 10
            #Maybe x and y should be switched xD
            x = dict_3D_to_imageName_XY[k][1][2][0]
            y = dict_3D_to_imageName_XY[k][1][2][1]
            label_list.append(segmentation_labels[int(frame_number)][y][x])
    dict_3D_to_labels[k] = label_list

Vote on labels for each 3D point, to have only one

In [39]:
#Currently voting is just using most common (can be changed tho)

from collections import Counter
def Most_Common(lst):
    if (lst):        
        data = Counter(lst)
        return int(data.most_common(1)[0][0])


dict_3D_to_SINGLE_LABEL = {}

for k in dict_3D_to_labels.keys():
    # Use voting strategy on each label list
    dict_3D_to_SINGLE_LABEL[k] = Most_Common(dict_3D_to_labels[k])

Hand made dict for colour labels (ugly!)

In [40]:
colours = {
    0 : [120 , 120 , 120],
    1 : [180 , 120 , 120],
    2 : [6 , 230 , 230],
    3 : [80 , 50 , 50],
    4 : [4 , 200 , 3],
    5 : [120 , 120 , 80],
    6 : [140 , 140 , 140],
    7 : [204 , 5 , 255],
    8 : [230 , 230 , 230],
    9 : [4 , 250 , 7],
    10 : [224 , 5 , 255],
    11 : [235 , 255 , 7],
    12 : [150 , 5 , 61],
    13 : [120 , 120 , 70],
    14 : [8 , 255 , 51],
    15 : [255 , 6 , 82],
    16 : [143 , 255 , 140],
    17 : [204 , 255 , 4],
    18 : [255 , 51 , 7],
    19 : [204 , 70 , 3],
    20 : [0 , 102 , 200],
    21 : [61 , 230 , 250],
    22 : [255 , 6 , 51],
    23 : [11 , 102 , 255],
    24 : [255 , 7 , 71],
    25 : [255 , 9 , 224],
    26 : [9 , 7 , 230],
    27 : [220 , 220 , 220],
    28 : [255 , 9 , 92],
    29 : [112 , 9 , 255],
    30 : [8 , 255 , 214],
    31 : [7 , 255 , 224],
    32 : [255 , 184 , 6],
    33 : [10 , 255 , 71],
    34 : [255 , 41 , 10],
    35 : [7 , 255 , 255],
    36 : [224 , 255 , 8],
    37 : [102 , 8 , 255],
    38 : [255 , 61 , 6],
    39 : [255 , 194 , 7],
    40 : [255 , 122 , 8],
    41 : [0 , 255 , 20],
    42 : [255 , 8 , 41],
    43 : [255 , 5 , 153],
    44 : [6 , 51 , 255],
    45 : [235 , 12 , 255],
    46 : [160 , 150 , 20],
    47 : [0 , 163 , 255],
    48 : [140 , 140 , 140],
    49 : [250 , 10 , 15],
    50 : [20 , 255 , 0],
    51 : [31 , 255 , 0],
    52 : [255 , 31 , 0],
    53 : [255 , 224 , 0],
    54 : [153 , 255 , 0],
    55 : [0 , 0 , 255],
    56 : [255 , 71 , 0],
    57 : [0 , 235 , 255],
    58 : [0 , 173 , 255],
    59 : [31 , 0 , 255],
    60 : [11 , 200 , 200],
    61 : [255 , 82 , 0],
    62 : [0 , 255 , 245],
    63 : [0 , 61 , 255],
    64 : [0 , 255 , 112],
    65 : [0 , 255 , 133],
    66 : [255 , 0 , 0],
    67 : [255 , 163 , 0],
    68 : [255 , 102 , 0],
    69 : [194 , 255 , 0],
    70 : [0 , 143 , 255],
    71 : [51 , 255 , 0],
    72 : [0 , 82 , 255],
    73 : [0 , 255 , 41],
    74 : [0 , 255 , 173],
    75 : [10 , 0 , 255],
    76 : [173 , 255 , 0],
    77 : [0 , 255 , 153],
    78 : [255 , 92 , 0],
    79 : [255 , 0 , 255],
    80 : [255 , 0 , 245],
    81 : [255 , 0 , 102],
    82 : [255 , 173 , 0],
    83 : [255 , 0 , 20],
    84 : [255 , 184 , 184],
    85 : [0 , 31 , 255],
    86 : [0 , 255 , 61],
    87 : [0 , 71 , 255],
    88 : [255 , 0 , 204],
    89 : [0 , 255 , 194],
    90 : [0 , 255 , 82],
    91 : [0 , 10 , 255],
    92 : [0 , 112 , 255],
    93 : [51 , 0 , 255],
    94 : [0 , 194 , 255],
    95 : [0 , 122 , 255],
    96 : [0 , 255 , 163],
    97 : [255 , 153 , 0],
    98 : [0 , 255 , 10],
    99 : [255 , 112 , 0],
    100 : [143 , 255 , 0],
    101 : [82 , 0 , 255],
    102 : [163 , 255 , 0],
    103 : [255 , 235 , 0],
    104 : [8 , 184 , 170],
    105 : [133 , 0 , 255],
    106 : [0 , 255 , 92],
    107 : [184 , 0 , 255],
    108 : [255 , 0 , 31],
    109 : [0 , 184 , 255],
    110 : [0 , 214 , 255],
    111 : [255 , 0 , 112],
    112 : [92 , 255 , 0],
    113 : [0 , 224 , 255],
    114 : [112 , 224 , 255],
    115 : [70 , 184 , 160],
    116 : [163 , 0 , 255],
    117 : [153 , 0 , 255],
    118 : [71 , 255 , 0],
    119 : [255 , 0 , 163],
    120 : [255 , 204 , 0],
    121 : [255 , 0 , 143],
    122 : [0 , 255 , 235],
    123 : [133 , 255 , 0],
    124 : [255 , 0 , 235],
    125 : [245 , 0 , 255],
    126 : [255 , 0 , 122],
    127 : [255 , 245 , 0],
    128 : [10 , 190 , 212],
    129 : [214 , 255 , 0],
    130 : [0 , 204 , 255],
    131 : [20 , 0 , 255],
    132 : [255 , 255 , 0],
    133 : [0 , 153 , 255],
    134 : [0 , 41 , 255],
    135 : [0 , 255 , 204],
    136 : [41 , 0 , 255],
    137 : [41 , 255 , 0],
    138 : [173 , 0 , 255],
    139 : [0 , 245 , 255],
    140 : [71 , 0 , 255],
    141 : [122 , 0 , 255],
    142 : [0 , 255 , 184],
    143 : [0 , 92 , 255],
    144 : [184 , 255 , 0],
    145 : [0 , 133 , 255],
    146 : [255 , 214 , 0],
    147 : [25 , 194 , 194],
    148 : [102 , 255 , 0],
    149 : [92 , 0 , 255]
}

## Create mapping from 3D ID to final segmentation colour

In [41]:
dict_3D_to_RGB = {}

for k in dict_3D_to_SINGLE_LABEL.keys():
    #Only change colour if label list has voted for one label
    if (dict_3D_to_SINGLE_LABEL[k]):
        dict_3D_to_RGB[k] = colours[dict_3D_to_SINGLE_LABEL[k]]

Sanity check:

In [42]:
tmp = 10

for k in dict_3D_to_RGB.keys():
    if(tmp == 0):
        break
    print(k)
    print(dict_3D_to_RGB[k])
    #print(dict_3D_to_imageName_XY[k][1][1][1])
    tmp-=1
    

449222
[0, 235, 255]
258968
[80, 50, 50]
293614
[120, 120, 80]
424636
[80, 50, 50]
105764
[80, 50, 50]
104390
[120, 120, 80]
608756
[80, 50, 50]
476562
[224, 5, 255]
99441
[0, 255, 194]
304635
[224, 255, 8]


## Change colour of all segmentated points in txt file

In [43]:
keys = dict_3D_to_RGB.keys()

for k in keys:
    pts[k] = pts[k]._replace(rgb=dict_3D_to_RGB[k])

In [44]:
write_points3D_text(pts, "_COLMAP/Model/Text/points3D.txt")
#write_images_text(ims, "_COLMAP/Model/Text/images.txt")

In [45]:
print(len(dict_3D_to_RGB))

126902
