In [36]:
%load_ext autoreload
%autoreload 2

from itertools import combinations
import numpy as np
import pandas as pd
from tqdm import tqdm

from generate.symmetry_config import *
from generate.generate_color_patterns import *

from utils.compressions import compress_for_color

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# assign_symmetry_classes('data', 'color_patterns_unassigned.npy', middle_idx_nr=0)

In [644]:
df = pd.read_csv('data/color_patterns/color_pattern_dataset3.csv')
df.colors = df.colors.map(eval)
df.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,3387468,"[34, 38, 11, 12, 13, 15, 48, 17, 25]",70745
1,1,3387469,"[33, 35, 3, 37, 12, 47, 28, 29, 31]",70745
2,2,3387470,"[32, 1, 2, 4, 6, 8, 43, 18, 52]",70745
3,3,3387471,"[3, 43, 12, 18, 20, 53, 22, 25, 26]",70745
4,4,3387472,"[3, 47, 48, 49, 15, 51, 53, 23, 30]",70745


In [645]:
len(df.index)

1129152

In [646]:
df.symmetry_class.unique().shape

(23524,)

In [647]:
small_df = df[df.symmetry_class < 10000]

In [648]:
len(small_df.index)

0

In [649]:
custom_df = pd.DataFrame({
    'index': [0,1],
    'colors': [[0,1,2,3,4,5,9,10,11],[0,1,2,3,4,5,15,16,17]],
    'symmetry_class': [0,1]
})

# Math Operations

In [60]:
def indices_to_position(indices):
    pos_array = np.array(pos_list)
    return pos_array[indices]

In [163]:
# direction = 'L'
# for i in range(54):
#     print(f'{i:2}', indices_to_position(i))
#     print(f'{rotation_dict[direction][i]:2}', indices_to_position(rotation_dict[direction][i]))
#     print('=============')

## 1. Volume

In [356]:
import functools
from scipy.spatial import ConvexHull

def calc_volume(vertices):
    vertices = indices_to_position(vertices)
#     print(vertices)
    if (vertices[:,0] == vertices[0,0]).all() or (vertices[:,1] == vertices[0,1]).all() or (vertices[:,2] == vertices[0,2]).all():
        volume = 0
    else:
        volume = ConvexHull(vertices).volume
    volume = np.rint(volume*10e4).astype(int)
#     print(volume)
#     print()
    return volume

In [357]:
compress_for_color(df, calc_volume)

  0%|                                                                                | 2/23697 [00:00<35:33, 11.10it/s]

ERROR: Same hashes (400000) for symmetry class 2 and 1





In [358]:
compress_for_color(custom_df, calc_volume)

 50%|█████████████████████████████████████████▌                                         | 1/2 [00:00<00:00, 124.84it/s]

ERROR: Same hashes (300000) for symmetry class 1 and 0





In [393]:
compress_for_color(distances_problems_df, calc_volume)

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  7.27it/s]


## 2a. Distances from middle

In [359]:
def find_middle(vertices):
    for vertex in vertices:
        if (vertex[0] == vertex[1] == 1.5) or (vertex[0] == vertex[2] == 1.5) or (vertex[1] == vertex[2] == 1.5):
            return vertex
    return None

def calc_distances(vertices):
    vertices = indices_to_position(vertices)
    middle = find_middle(vertices)
    distances = np.linalg.norm(vertices - middle.reshape(1, -1), axis=1)
    distances = distances[distances != 0]
    distances = np.sort(distances)
    distances = np.rint(distances*10e4).astype(int)
    return distances

In [360]:
compress_for_color(df, calc_distances)

  0%|                                                                                | 2/23697 [00:00<06:35, 59.85it/s]

ERROR: Same hashes (-7827538487561452301) for symmetry class 2 and 1





In [361]:
compress_for_color(custom_df, calc_distances)

 50%|█████████████████████████████████████████                                         | 1/2 [00:00<00:00, 1577.40it/s]

ERROR: Same hashes (-4928664618097705469) for symmetry class 1 and 0





## 2b. Distances from each other

In [658]:
np.sort(np.array([[2,1,3],[3,4,0],[6,7,5]]), axis=1)

array([[1, 2, 3],
       [0, 3, 4],
       [5, 6, 7]])

In [89]:
from scipy.spatial import distance_matrix
from copy import deepcopy

In [98]:
def calc_distances_b(vertices, return_argsort=False):
    vertices = indices_to_position(vertices)
    distances = distance_matrix(vertices, vertices)
    for message_pass in range(2):
        new_distances = deepcopy(distances)
        for i in range(len(vertices)):
            new_distances += distances[:, i].reshape(1, -1) * distances[i].reshape(-1, 1)
        distances = deepcopy(new_distances)
    distances = np.sort(distances, axis=1)  # .sum(axis=1))
    distances = np.rint(distances*10e4).astype(np.int64)
    return distances

In [670]:
compress_for_color(df, calc_distances_b)

100%|████████████████████████████████████████████████████████████████████████████| 23524/23524 [09:23<00:00, 41.75it/s]


In [667]:
compress_for_color(custom_df, calc_distances_b)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 146.28it/s]


In [668]:
compress_for_color(distances_problems_df, calc_distances_b)

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 62.32it/s]


## 2c. Distances from each other - reordered

In [398]:
from scipy.spatial import distance_matrix

In [407]:
def calc_distances_c(vertices):
    vertices = indices_to_position(vertices)
    distances = distance_matrix(vertices, vertices)
    distances = np.sort(distances, axis=1)
#     distance_indices = np.lexsort(np.rot90(distances))
#     distances = distances[distance_indices].flatten()
#     distances = np.rint(distances*10e4).astype(int)
    return distances

In [400]:
compress_for_color(df, calc_distances_c)

 93%|██████████████████████████████████████████████████████████████████████▊     | 22088/23697 [07:21<00:32, 48.96it/s]

ERROR: Same hashes (-6303895141591738171) for symmetry class 22084 and 22000


 94%|███████████████████████████████████████████████████████████████████████▎    | 22224/23697 [07:23<00:29, 49.47it/s]

ERROR: Same hashes (-6440304866058163833) for symmetry class 22216 and 22030


100%|████████████████████████████████████████████████████████████████████████████| 23697/23697 [07:52<00:00, 50.12it/s]


In [410]:
print(indices_to_position(df[df.symmetry_class == 22084].iloc[0].colors))
print(calc_distances_c(df[df.symmetry_class == 22084].iloc[0].colors))

[[0.5 0.5 0. ]
 [2.5 0.5 0. ]
 [1.5 1.5 0. ]
 [3.  2.5 1.5]
 [2.5 2.5 0. ]
 [0.  0.5 1.5]
 [0.5 2.5 3. ]
 [2.5 0.  1.5]
 [0.  2.5 1.5]]
[[0.         1.41421356 1.58113883 2.         2.54950976 2.54950976
  2.82842712 3.53553391 3.60555128]
 [0.         1.41421356 1.58113883 2.         2.         2.54950976
  2.91547595 3.53553391 4.12310563]
 [0.         1.41421356 1.41421356 1.41421356 2.34520788 2.34520788
  2.34520788 2.34520788 3.31662479]
 [0.         1.58113883 2.34520788 2.54950976 2.54950976 2.91547595
  3.         3.53553391 3.60555128]
 [0.         1.41421356 1.58113883 2.         2.82842712 2.91547595
  2.91547595 3.53553391 3.60555128]
 [0.         1.58113883 2.         2.34520788 2.54950976 2.54950976
  2.91547595 3.53553391 3.60555128]
 [0.         1.58113883 2.54950976 2.91547595 3.31662479 3.53553391
  3.60555128 3.60555128 4.12310563]
 [0.         1.58113883 2.34520788 2.54950976 2.54950976 2.54950976
  2.91547595 3.53553391 3.53553391]
 [0.         1.58113883 2.      

In [411]:
print(indices_to_position(df[df.symmetry_class == 22000].iloc[0].colors))
print(calc_distances_c(df[df.symmetry_class == 22000].iloc[0].colors))

[[2.5 3.  2.5]
 [2.5 3.  0.5]
 [0.5 1.5 0. ]
 [2.5 1.5 0. ]
 [2.5 1.5 3. ]
 [0.  1.5 2.5]
 [0.5 0.  2.5]
 [0.5 3.  0.5]
 [1.5 3.  1.5]]
[[0.         1.41421356 1.58113883 2.         2.82842712 2.91547595
  2.91547595 3.53553391 3.60555128]
 [0.         1.41421356 1.58113883 2.         2.         2.54950976
  2.91547595 3.53553391 4.12310563]
 [0.         1.58113883 2.         2.34520788 2.54950976 2.54950976
  2.91547595 3.53553391 3.60555128]
 [0.         1.58113883 2.         2.34520788 2.54950976 2.91547595
  3.         3.53553391 3.53553391]
 [0.         1.58113883 2.34520788 2.54950976 2.54950976 2.91547595
  3.         3.53553391 3.60555128]
 [0.         1.58113883 2.34520788 2.54950976 2.54950976 2.54950976
  2.91547595 3.53553391 3.53553391]
 [0.         1.58113883 2.54950976 2.91547595 3.31662479 3.53553391
  3.60555128 3.60555128 4.12310563]
 [0.         1.41421356 1.58113883 2.         2.54950976 2.54950976
  2.82842712 3.53553391 3.60555128]
 [0.         1.41421356 1.414213

In [366]:
distances_problems_df = df[(df.symmetry_class == 22084) | (df.symmetry_class == 22000)]

## 2d. Differences from each other

In [583]:
np.repeat(np.array([[0,0,1], [0,1,0], [1,0,0]]), 4, axis=0).reshape(4, 3, -1) - np.array([[0,0,1], [0,1,0], [1,0,0]]).reshape(1, 3, -1)

array([[[ 0,  0,  0],
        [ 0, -1,  1],
        [-1,  0,  1]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [-1,  1,  0]],

       [[ 0,  1, -1],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 1,  0, -1],
        [ 1, -1,  0],
        [ 0,  0,  0]]])

In [610]:
def calc_differences(vertices, return_argsort=False):
    vertices = indices_to_position(vertices)
    vertices_count = len(vertices)
    
    distances = np.repeat(vertices, vertices_count, axis=0).reshape(vertices_count, vertices_count, -1) - vertices.reshape(1, vertices_count, -1)
    distances = np.sum(distances, axis=1)
    print(distances)
    distances = np.abs(distances)
    distances = np.sum(distances, axis=1)
#     distances = np.abs(np.sum(distances, axis=(1,2)))
    distances = np.sort(distances)
    distances = np.rint(distances*10e4).astype(int)
    return distances

In [585]:
compress_for_color(df, calc_differences)

  0%|                                                                                | 1/23697 [00:00<10:22, 38.05it/s]

ERROR: Multiple hashes in a symmetry class 1
6     [18, 19, 20, 21, 22, 24, 25, 26, 30]
7       [1, 9, 10, 11, 12, 13, 14, 15, 17]
8     [39, 45, 46, 47, 48, 49, 51, 52, 53]
9     [36, 37, 38, 40, 41, 42, 43, 44, 50]
10      [3, 9, 10, 11, 13, 14, 15, 16, 17]
11            [0, 1, 2, 3, 4, 6, 7, 8, 14]
12    [36, 37, 38, 39, 40, 42, 43, 44, 48]
13    [36, 37, 38, 39, 40, 41, 42, 44, 52]
14      [7, 9, 11, 12, 13, 14, 15, 16, 17]
15    [33, 34, 35, 21, 27, 28, 29, 30, 31]
16            [0, 1, 2, 3, 4, 5, 6, 8, 10]
17            [0, 2, 3, 4, 5, 6, 7, 8, 16]
18            [0, 1, 2, 4, 5, 6, 7, 8, 12]
19    [32, 33, 35, 25, 27, 28, 29, 30, 31]
20    [36, 38, 39, 40, 41, 42, 43, 44, 46]
21    [32, 33, 34, 35, 23, 27, 28, 29, 31]
22    [37, 45, 47, 48, 49, 50, 51, 52, 53]
23    [32, 18, 19, 20, 22, 23, 24, 25, 26]
24    [41, 45, 46, 47, 49, 50, 51, 52, 53]
25    [43, 45, 46, 47, 48, 49, 50, 51, 53]
26    [18, 20, 21, 22, 23, 24, 25, 26, 28]
27    [32, 33, 34, 35, 19, 27, 29, 30, 31]
28    [34




In [None]:
compress_for_color(custom_df, calc_differences)

In [611]:
compress_for_color(df[df.symmetry_class == 1].iloc[4:6], calc_differences)
# compress_for_color(df[df.symmetry_class == 22000].iloc[0:1], calc_differences)

100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 167.05it/s]

[[ -7.   0. -24.]
 [ 11.  -9.   3.]
 [  2.  -9.   3.]
 [ -7.  -9.   3.]
 [  2.   0.   3.]
 [ -7.   0.   3.]
 [ 11.   9.   3.]
 [  2.   9.   3.]
 [ -7.   9.   3.]]
[[-7. -9. -3.]
 [ 2. -9. -3.]
 [11. -9. -3.]
 [-7.  0. -3.]
 [ 2.  0. -3.]
 [-7.  9. -3.]
 [ 2.  9. -3.]
 [11.  9. -3.]
 [-7.  0. 24.]]





In [612]:
compress_for_color(distances_problems_df, calc_differences)

 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00,  7.68it/s]

[[  9.5   9.    9.5]
 [  9.5   9.   -8.5]
 [ -8.5  -4.5 -13. ]
 [  9.5  -4.5 -13. ]
 [  9.5  -4.5  14. ]
 [-13.   -4.5   9.5]
 [ -8.5 -18.    9.5]
 [ -8.5   9.   -8.5]
 [  0.5   9.    0.5]]
[[  8.5   9.    8.5]
 [ -9.5  -4.5 -14. ]
 [ 13.   -4.5  -9.5]
 [  8.5  -4.5  13. ]
 [ -9.5  -4.5  13. ]
 [  8.5 -18.   -9.5]
 [ -9.5   9.    8.5]
 [ -9.5   9.   -9.5]
 [ -0.5   9.   -0.5]]
[[  9.5  -8.5 -18. ]
 [ 14.    9.5  -4.5]
 [ -8.5  -8.5   9. ]
 [  0.5   0.5   9. ]
 [-13.   -8.5  -4.5]
 [  9.5   9.5   9. ]
 [ -8.5   9.5   9. ]
 [  9.5 -13.   -4.5]
 [-13.    9.5  -4.5]]
[[  8.5   9.    8.5]
 [  8.5   9.   -9.5]
 [ 13.   -4.5   8.5]
 [ 13.   -4.5  -9.5]
 [ -9.5  -4.5  13. ]
 [-14.   -4.5  -9.5]
 [ -9.5 -18.    8.5]
 [ -9.5   9.   -9.5]
 [ -0.5   9.   -0.5]]
[[ -8.5  -9.5 -18. ]
 [  9.5  13.   -4.5]
 [  9.5  -9.5   9. ]
 [  0.5  -0.5   9. ]
 [-13.   -9.5  -4.5]
 [  9.5   8.5   9. ]
 [ -8.5   8.5   9. ]
 [  9.5 -14.   -4.5]
 [ -8.5  13.   -4.5]]
[[  8.5  -8.5  -9. ]
 [ -0.5   0.5  -9. ]
 [ 13.  




## 3a. Angles to middle

In [420]:
def unit_vector(vector):
    """ Returns the unit vector of the vector. """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2' """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.abs(np.arccos(np.clip(np.dot(v1_u, v2_u.T), -1.0, 1.0)))

def calc_angles(vertices):
    vertices = indices_to_position(vertices)
    middle = find_middle(vertices)
    distances = np.linalg.norm(vertices - middle.reshape(1, -1), axis=1)
    vertices = vertices[distances != 0]  # - middle,  np.zeros(3)
    vertices -= np.array([1.5, 1.5, 1.5])
    middle -= np.array([1.5, 1.5, 1.5])
    angles = np.array([angle_between(v, middle) for v in vertices])
    angles = np.sort(angles)
    angles = np.rint(angles*10e4).astype(int)
#     print(angles)
    return angles

In [None]:
compress_for_color(df, calc_angles)

In [321]:
compress_for_color(custom_df, calc_angles)

 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00, 59.46it/s]

[ 58800  58800  58800  75597  75597 238562 238562 255359]
[ 58800  58800  58800  75597  75597 238562 238562 255359]
ERROR: Same hashes (-2672999848650892673) for symmetry class 1 and 0





## 3b. Angles between each other

In [426]:
def calc_angles_b(vertices):
    vertices = indices_to_position(vertices)
    vertices -= np.array([1.5, 1.5, 1.5])
    angles = []
    for i, vertex1 in enumerate(vertices):
        for j, vertex2 in enumerate(vertices[i+1:]):
            angles.append(angle_between(vertex1, vertex2))
    angles = np.array(angles)
    angles = np.sort(angles)
    angles = np.rint(angles*10e4).astype(int)
    return angles

In [329]:
compress_for_color(df, calc_angles_b)

  0%|                                                                               | 37/23697 [00:05<58:33,  6.73it/s]

ERROR: Same hashes (-283690757613734283) for symmetry class 37 and 34





In [331]:
compress_for_color(custom_df, calc_angles_b)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 185.24it/s]


## 3c. Angles between each other BETTER

In [427]:
def calc_angles_c(vertices):
    vertices = indices_to_position(vertices)
    vertices -= np.array([1.5, 1.5, 1.5])    
    angles = angle_between(vertices, vertices)
    angles = np.abs(np.sum(angles, axis=1))
    angles = np.sort(angles)
    angles = np.rint(angles*10e4).astype(int)
    return angles

In [428]:
compress_for_color(df, calc_angles_c)

  0%|▍                                                                             | 115/23697 [00:02<07:58, 49.29it/s]

ERROR: Same hashes (-3945103403229245425) for symmetry class 115 and 33





In [429]:
compress_for_color(custom_df, calc_angles_c)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 186.17it/s]


## 3d. Distances B + Angles between each other BETTER

In [521]:
def calc_distances_with_indices(vertices):
    vertices = indices_to_position(vertices)
    distances = distance_matrix(vertices, vertices).sum(axis=1)
    distances_indices = np.argsort(distances)
    distances = distances[distances_indices]
    distances = np.rint(distances*10e4).astype(int)
    return distances, distances_indices

def calc_distances_with_indices_b(vertices):
    vertices = indices_to_position(vertices)
    distances = distance_matrix(vertices, vertices)
    distances_inner_ind = np.argsort(distances, axis=1)
    distances = distances[np.arange(distances.shape[0])[:,None], distances_inner_ind]
#     distances_outer_ind = np.argsort(distances, axis=0)
    distances_outer_ind = np.lexsort(np.rot90(distances))
    distances = distances[distances_outer_ind].flatten()
    distances = np.rint(distances*10e4).astype(int)
    return distances, distances_inner_ind, distances_outer_ind

def calc_unsorted_distances(vertices):
    vertices = indices_to_position(vertices)
    distances = distance_matrix(vertices, vertices)
    distances = np.rint(distances*10e4).astype(int)
    return distances

def calc_unsorted_angles(vertices):
    vertices = indices_to_position(vertices)
    vertices -= np.array([1.5, 1.5, 1.5])
    angles = angle_between(vertices, vertices)
    angles = np.rint(angles*10e4).astype(int)
    return angles

In [534]:
def calc_distances_b_angles_c(vertices):
    distances, inner_ind, outer_ind = calc_distances_with_indices_b(vertices)
    angles = calc_unsorted_angles(vertices)
    angles = angles[np.arange(angles.shape[0])[:,None], inner_ind]
    angles = angles[outer_ind].flatten()
    print(angles)
#     print(angles.shape)
    return np.concatenate((distances, angles))

In [536]:
compress_for_color(df, calc_distances_b_angles_c)

  0%|                                                                                        | 0/23697 [00:00<?, ?it/s]

[150097 150097 150097 150097 150097 150097 150097 150097 150097 146985
 146985 146985 150097 150097 150097 153203 153203 153203 146985 146985
 150097 146985 150097 150097 153203 153203 153203 146985 146985 150097
 146985 150097 150097 153203 153203 153203 146985 150097 146985 146985
 150097 150097 153203 153203 153203 143863 146985 146985 150097 150097
 150097 153203 153203 156304 143863 146985 146985 150097 150097 150097
 153203 153203 156304 143863 146985 146985 150097 150097 150097 153203
 153203 156304 143863 146985 146985 150097 150097 150097 153203 153203
 156304]
[150097 150097 150097 150097 150097 150097 150097 150097 150097 146985
 146985 146985 150097 150097 150097 153203 153203 153203 146985 146985
 150097 146985 150097 150097 153203 153203 153203 146985 146985 150097
 146985 150097 150097 153203 153203 153203 146985 150097 146985 146985
 150097 150097 153203 153203 153203 143863 146985 146985 150097 150097
 150097 153203 153203 156304 143863 146985 146985 150097 150097 1500




In [None]:
compress_for_color(custom_df, calc_distances_b_angles_c)

In [453]:
compress_for_color(distances_problems_df, calc_distances_b_angles_c)

 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00, 15.90it/s]

ERROR: Same hashes (-3533391555178090019) for symmetry class 22084 and 22000





## 4. Volume + Distances B (+ Angles B)

In [396]:
def calc_volume_distances_angles(vertices):
#     volume = calc_volume(vertices)
    distances = calc_distances_b(vertices)
    angles = calc_angles_b(vertices)
    return np.concatenate((distances, angles))
#     return np.concatenate((np.array([volume]), distances))

In [375]:
compress_for_color(df, calc_volume_distances)

100%|████████████████████████████████████████████████████████████████████████████| 23697/23697 [58:30<00:00,  6.75it/s]


In [397]:
compress_for_color(distances_problems_df, calc_volume_distances_angles)

 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00,  2.57it/s]

ERROR: Same hashes (1339496312954611348) for symmetry class 22084 and 22000





In [343]:
indices_to_position(df[df.symmetry_class == 92].iloc[0].colors)

array([[1.5, 3. , 0.5],
       [0. , 0.5, 2.5],
       [0. , 0.5, 0.5],
       [0. , 1.5, 1.5],
       [0. , 1.5, 0.5],
       [0. , 2.5, 2.5],
       [0. , 2.5, 1.5],
       [0. , 2.5, 0.5],
       [1.5, 0. , 2.5]])

In [344]:
indices_to_position(df[df.symmetry_class == 53].iloc[0].colors)

array([[0.5, 1.5, 0. ],
       [2.5, 1.5, 3. ],
       [2.5, 0. , 2.5],
       [2.5, 0. , 0.5],
       [1.5, 0. , 2.5],
       [1.5, 0. , 1.5],
       [0.5, 0. , 2.5],
       [0.5, 0. , 1.5],
       [0.5, 0. , 0.5]])

In [373]:
compress_for_color(custom_df, calc_volume_distances)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 141.46it/s]


## Vector Products

In [183]:
def calc_cross_product(vertices):
    vertices = indices_to_position(vertices)
    middle = find_middle(vertices)
    vertices -= np.array([1.5, 1.5, 1.5])
    cross_products = np.cross(vertices, middle.reshape(1, -1), axis=1)
    cross_product = np.sum(cross_products, axis=0)
    cross_product = np.rint(cross_product*10e4).astype(int)
    return cross_product

In [229]:
compress_for_color(df, calc_cross_product)

  0%|                                                                                | 1/23697 [00:00<11:30, 34.33it/s]

ERROR: Multiple hashes in a symmetry class 1
6     [18, 19, 20, 21, 22, 24, 25, 26, 30]
7       [1, 9, 10, 11, 12, 13, 14, 15, 17]
8     [39, 45, 46, 47, 48, 49, 51, 52, 53]
9     [36, 37, 38, 40, 41, 42, 43, 44, 50]
10      [3, 9, 10, 11, 13, 14, 15, 16, 17]
11            [0, 1, 2, 3, 4, 6, 7, 8, 14]
12    [36, 37, 38, 39, 40, 42, 43, 44, 48]
13    [36, 37, 38, 39, 40, 41, 42, 44, 52]
14      [7, 9, 11, 12, 13, 14, 15, 16, 17]
15    [33, 34, 35, 21, 27, 28, 29, 30, 31]
16            [0, 1, 2, 3, 4, 5, 6, 8, 10]
17            [0, 2, 3, 4, 5, 6, 7, 8, 16]
18            [0, 1, 2, 4, 5, 6, 7, 8, 12]
19    [32, 33, 35, 25, 27, 28, 29, 30, 31]
20    [36, 38, 39, 40, 41, 42, 43, 44, 46]
21    [32, 33, 34, 35, 23, 27, 28, 29, 31]
22    [37, 45, 47, 48, 49, 50, 51, 52, 53]
23    [32, 18, 19, 20, 22, 23, 24, 25, 26]
24    [41, 45, 46, 47, 49, 50, 51, 52, 53]
25    [43, 45, 46, 47, 48, 49, 50, 51, 53]
26    [18, 20, 21, 22, 23, 24, 25, 26, 28]
27    [32, 33, 34, 35, 19, 27, 29, 30, 31]
28    [34




## Determinant

In [387]:
def calc_determinant(vertices):
    vertices = indices_to_position(vertices)
    vertices -= np.array([1.5, 1.5, 1.5])
    square_mat = np.dot(vertices.T, vertices)
    determinant = np.linalg.det(square_mat)
    determinant = np.rint(determinant*10e2).astype(int)
    return determinant

In [388]:
compress_for_color(df, calc_determinant)

  0%|                                                                                | 1/23697 [00:00<06:25, 61.54it/s]

ERROR: Same hashes (729000) for symmetry class 1 and 0





In [389]:
compress_for_color(distances_problems_df, calc_determinant)

 50%|██████████████████████████████████████████                                          | 1/2 [00:00<00:00, 42.32it/s]

ERROR: Same hashes (1033734) for symmetry class 22084 and 22000





In [390]:
compress_for_color(custom_df, calc_determinant)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 351.69it/s]


## Solid Angle

## ChatGPT suggestion

In [261]:
def calc_something(vertices):
    vertices = indices_to_position(vertices)
    mean_vector = np.mean(vertices, axis=0)
    vertices -= mean_vector.reshape(1, -1)
    cov_mat = np.cov(vertices.T)
    eigenvalues, eigenvectors = np.linalg.eig(cov_mat)
    eigenvec = eigenvectors[np.argmax(eigenvalues)]
    print(eigenvec)
    somethings = eigenvec.reshape(1,-1) * (np.dot(vertices, eigenvec.reshape(-1, 1)) / np.dot(eigenvec, eigenvec)).reshape(-1, 1)
#     print(somethings)
    something = np.mean(somethings, axis=0)
    something = np.rint(something*10e4).astype(int)
    return something

In [263]:
compress_for_color(df, calc_something)

  0%|                                                                                | 1/23697 [00:00<19:12, 20.55it/s]

[1. 0. 0.]
[0. 1. 0.]
[1. 0. 0.]
[0. 1. 0.]
[1. 0. 0.]
[1. 0. 0.]
[ 0.          0.85557203 -0.51768379]
[0. 0. 1.]
[0.85557203 0.51768379 0.        ]
[0.85557203 0.51768379 0.        ]
[0. 0. 1.]
[0. 0. 1.]
[0.85557203 0.51768379 0.        ]
[0.85557203 0.51768379 0.        ]
[0. 0. 1.]
[0.         0.85557203 0.51768379]
[0. 0. 1.]
[0. 0. 1.]
[ 0.  0. -1.]
[ 0.51768379 -0.85557203  0.        ]
[ 0.85557203 -0.51768379  0.        ]
[ 0.          0.85557203 -0.51768379]
[ 8.55572029e-01 -5.17683786e-01  4.09029535e-17]
[ 0.         -0.85557203  0.51768379]
[0.85557203 0.51768379 0.        ]
[0.85557203 0.51768379 0.        ]
[-0.51768379 -0.85557203  0.        ]
[-0.51768379 -0.85557203  0.        ]
[ 0.51768379 -0.85557203  0.        ]
[0. 0. 1.]
ERROR: Same hashes (-5249590279670061114) for symmetry class 1 and 0





In [248]:
compress_for_color(custom_df, calc_something)

 50%|█████████████████████████████████████████▌                                         | 1/2 [00:00<00:00, 143.06it/s]

ERROR: Same hashes (-5249590279670061114) for symmetry class 1 and 0





# Debug Problems

In [20]:
df_509 = pd.read_csv('data/color_patterns/color_pattern_dataset509.csv')
df_509.colors = df_509.colors.map(eval)
df_509.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,574738008,"[0, 39, 40, 8, 14, 15, 17, 52, 23]",11978565
1,1,574738009,"[0, 8, 41, 11, 16, 17, 21, 22, 28]",11978565
2,2,574738010,"[34, 35, 41, 10, 49, 18, 52, 26, 29]",11978565
3,3,574738011,"[32, 37, 38, 44, 14, 47, 19, 51, 22]",11978565
4,4,574738012,"[4, 7, 43, 14, 18, 26, 27, 29, 30]",11978565


In [3]:
df_511 = pd.read_csv('data/color_patterns/color_pattern_dataset511.csv')
df_511.colors = df_511.colors.map(eval)
df_511.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,576996312,"[35, 43, 13, 46, 45, 16, 23, 24, 26]",12025613
1,1,576996313,"[12, 13, 44, 50, 51, 53, 25, 27, 28]",12025613
2,2,576996314,"[33, 4, 7, 43, 46, 47, 21, 24, 26]",12025613
3,3,576996315,"[34, 3, 44, 45, 46, 14, 17, 51, 31]",12025613
4,4,576996316,"[1, 2, 38, 8, 41, 11, 50, 30, 31]",12025613


In [134]:
df_512 = pd.read_csv('data/color_patterns/color_pattern_dataset512.csv')
df_512.colors = df_512.colors.map(eval)
df_512.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,578125464,"[32, 6, 40, 8, 11, 46, 48, 20, 21]",12049137
1,1,578125465,"[2, 8, 41, 47, 48, 17, 22, 28, 30]",12049137
2,2,578125466,"[35, 4, 42, 44, 14, 16, 51, 19, 28]",12049137
3,3,578125467,"[35, 5, 37, 7, 42, 13, 46, 24, 26]",12049137
4,4,578125468,"[1, 36, 5, 43, 13, 20, 52, 27, 29]",12049137


In [112]:
df_514 = pd.read_csv('data/color_patterns/color_pattern_dataset514.csv')
df_514.colors = df_514.colors.map(eval)
df_514.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,580383768,"[34, 6, 7, 9, 11, 48, 49, 20, 21]",12096185
1,1,580383769,"[1, 8, 12, 46, 49, 20, 26, 27, 30]",12096185
2,2,580383770,"[0, 35, 37, 7, 40, 14, 18, 21, 29]",12096185
3,3,580383771,"[32, 2, 8, 41, 42, 14, 46, 17, 31]",12096185
4,4,580383772,"[4, 7, 39, 45, 19, 20, 52, 27, 29]",12096185


In [113]:
df_515 = pd.read_csv('data/color_patterns/color_pattern_dataset515.csv')
df_515.colors = df_515.colors.map(eval)
df_515.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,581512920,"[35, 5, 6, 8, 43, 11, 49, 52, 21]",12119709
1,1,581512921,"[0, 34, 6, 7, 39, 9, 53, 22, 25]",12119709
2,2,581512922,"[32, 0, 36, 12, 46, 47, 53, 21, 31]",12119709
3,3,581512923,"[0, 2, 3, 40, 43, 15, 52, 26, 30]",12119709
4,4,581512924,"[32, 5, 44, 45, 17, 51, 52, 21, 22]",12119709


In [106]:
df_516 = pd.read_csv('data/color_patterns/color_pattern_dataset516.csv')
df_516.colors = df_516.colors.map(eval)
df_516.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,582642072,"[3, 36, 5, 41, 45, 13, 47, 28, 29]",12143233
1,1,582642073,"[34, 38, 44, 45, 12, 15, 52, 22, 28]",12143233
2,2,582642074,"[38, 9, 44, 12, 46, 19, 51, 25, 31]",12143233
3,3,582642075,"[33, 35, 4, 10, 16, 51, 52, 21, 26]",12143233
4,4,582642076,"[32, 1, 33, 7, 43, 44, 13, 24, 26]",12143233


In [172]:
df_527 = pd.read_csv('data/color_patterns/color_pattern_dataset527.csv')
df_527.colors = df_527.colors.map(eval)
df_527.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,595062744,"[0, 3, 12, 15, 48, 49, 18, 24, 30]",12402158
1,1,595062745,"[34, 35, 7, 15, 49, 17, 52, 24, 25]",12402158
2,2,595062746,"[32, 1, 2, 41, 47, 16, 17, 53, 31]",12402158
3,3,595062747,"[3, 42, 11, 43, 17, 52, 53, 22, 25]",12402158
4,4,595062748,"[32, 0, 1, 38, 44, 15, 16, 50, 31]",12402158


In [147]:
df_529 = pd.read_csv('data/color_patterns/color_pattern_dataset529.csv')
df_529.colors = df_529.colors.map(eval)
df_529.head()

Unnamed: 0.1,Unnamed: 0,index,colors,symmetry_class
0,0,597321048,"[7, 40, 9, 15, 18, 21, 24, 28, 30]",12449206
1,1,597321049,"[3, 7, 41, 11, 45, 14, 47, 17, 22]",12449206
2,2,597321050,"[5, 7, 15, 16, 49, 17, 23, 27, 29]",12449206
3,3,597321051,"[33, 1, 40, 9, 15, 21, 25, 27, 30]",12449206
4,4,597321052,"[5, 7, 9, 42, 12, 44, 15, 50, 22]",12449206


In [28]:
distances_problems_df_509 = df_509[(df_509.symmetry_class == 12039903) | (df_509.symmetry_class == 11997399)]
distances_problems_df_509_ex = distances_problems_df_509.iloc[0].colors

In [29]:
distances_problems_df_511 = df_511[(df_511.symmetry_class == 12039903) | (df_511.symmetry_class == 11997399)]
distances_problems_df_511_ex = distances_problems_df_511.iloc[0].colors

In [57]:
print(visualize_color_indices(distances_problems_df_511_ex))

   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      


In [104]:
counter_i = 0
for ex in distances_problems_df_509.colors:
    if 22 not in ex:
        continue
    counter_i += 1
    print(visualize_color_indices(ex))
    print("\n")

   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      


   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      


   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m

In [71]:
print(visualize_color_indices(distances_problems_df_509_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [99]:
problem1 = calc_distances_b(distances_problems_df_511_ex)
print('\n'.join([' '.join(map(str, row)) for row in problem1]))

2426544357 2561404872 2575653077 2660080535 2664212781 2723934738 2806593180 2854918928 2960869223
2559460714 2703979405 2714993574 2807027897 2808994569 2876442392 2960869223 3009923204 3124550551
2489461743 2509398578 2550728594 2660443724 2664212781 2703488120 2724125961 2808994569 2814490006
2567072445 2635176018 2677416302 2797868908 2799347530 2814490006 2854918928 2968829883 3009923204
2276735437 2291284819 2326065060 2426544357 2429897152 2472764925 2489461743 2559460714 2567072445
2472764925 2504457573 2534677877 2657909787 2660080535 2689926550 2703488120 2799347530 2807027897
2291284819 2388384984 2390618256 2504457573 2509398578 2537835855 2561404872 2635176018 2703979405
2429897152 2536642009 2537835855 2657909787 2660443724 2697740634 2723934738 2797868908 2876442392
2326065060 2390618256 2423100711 2534677877 2536642009 2550728594 2575653077 2677416302 2714993574


In [100]:
problem2 = calc_distances_b(distances_problems_df_509_ex)
print('\n'.join([' '.join(map(str, row)) for row in problem2]))

2429897152 2536642009 2537835855 2657909787 2660443724 2697740634 2723934738 2797868908 2876442392
2291284819 2388384984 2390618256 2504457573 2509398578 2537835855 2561404872 2635176018 2703979405
2489461743 2509398578 2550728594 2660443724 2664212781 2703488120 2724125961 2808994569 2814490006
2276735437 2291284819 2326065060 2426544357 2429897152 2472764925 2489461743 2559460714 2567072445
2472764925 2504457573 2534677877 2657909787 2660080535 2689926550 2703488120 2799347530 2807027897
2559460714 2703979405 2714993574 2807027897 2808994569 2876442392 2960869223 3009923204 3124550551
2567072445 2635176018 2677416302 2797868908 2799347530 2814490006 2854918928 2968829883 3009923204
2426544357 2561404872 2575653077 2660080535 2664212781 2723934738 2806593180 2854918928 2960869223
2326065060 2390618256 2423100711 2534677877 2536642009 2550728594 2575653077 2677416302 2714993574


In [135]:
distances_problems_df_512 = df_512[(df_512.symmetry_class == 12050529) | (df_512.symmetry_class == 12008025)]
distances_problems_df_512_ex = distances_problems_df_512.iloc[7].colors
print(visualize_color_indices(distances_problems_df_512_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[90m-
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [146]:
distances_problems_df_512 = df_510[(df_510.symmetry_class == 12050529) | (df_510.symmetry_class == 12008025)]
distances_problems_df_512_ex = distances_problems_df_512.iloc[9].colors
print(visualize_color_indices(distances_problems_df_512_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[91mX
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [121]:
distances_problems_df_516_1 = df_516[(df_516.symmetry_class == 12146163) | (df_516.symmetry_class == 12103659)]
distances_problems_df_516_1_ex = distances_problems_df_516_1.iloc[7].colors
print(visualize_color_indices(distances_problems_df_516_1_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[91mX[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [122]:
distances_problems_df_516_1 = df_514[(df_514.symmetry_class == 12146163) | (df_514.symmetry_class == 12103659)]
distances_problems_df_516_1_ex = distances_problems_df_516_1.iloc[0].colors
print(visualize_color_indices(distances_problems_df_516_1_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [132]:
distances_problems_df_516_2 = df_516[(df_516.symmetry_class == 12156789) | (df_516.symmetry_class == 12114285)]
distances_problems_df_516_2_ex = distances_problems_df_516_2.iloc[2].colors
print(visualize_color_indices(distances_problems_df_516_2_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[91mX[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [133]:
distances_problems_df_516_2 = df_514[(df_514.symmetry_class == 12156789) | (df_514.symmetry_class == 12114285)]
distances_problems_df_516_2_ex = distances_problems_df_516_2.iloc[5].colors
print(visualize_color_indices(distances_problems_df_516_2_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [111]:
len(distances_problems_df_516_2)

48

In [166]:
distances_problems_df_529 = df_529[(df_529.symmetry_class == 12459791) | (df_529.symmetry_class == 12406661)]
distances_problems_df_529_ex = distances_problems_df_529.iloc[11].colors
print(visualize_color_indices(distances_problems_df_529_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-[90m-[90m-
[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      


In [174]:
distances_problems_df_529 = df_527[(df_527.symmetry_class == 12459791) | (df_527.symmetry_class == 12406661)]
distances_problems_df_529_ex = distances_problems_df_529.iloc[0].colors
print(visualize_color_indices(distances_problems_df_529_ex))

   [90m-[90m-[90m-      
   [90m-[91mX[90m-      
   [90m-[90m-[90m-      
[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[90m-
[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX[90m-[90m-[91mX
[90m-[90m-[90m-[90m-[90m-[90m-[90m-[90m-[91mX[90m-[90m-[91mX
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
   [90m-[90m-[90m-      
