In [1]:
import sys
import numpy as np
from scipy.sparse import *

## Examples
SDR Matrix

    { 0: array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      1: array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      2: array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      3: array([1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      4: array([1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      5: array([1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      6: array([1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      7: array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      8: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      9: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     10: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     11: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     12: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     13: array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     14: array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=int8),
     15: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int8),
     16: array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int8),
     17: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], dtype=int8),
     18: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int8),
     19: array([1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype=int8),
     20: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int8)}

Features & Leaves

    features = [0, 20]
    leaves = [2, 3, 6, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20]


Feature to Delete

    [20,:] = 0

Feature to Change

    [0,0] = 0
    [0,[1,2]] = 1
    [Any row with 1 in column 0, [1,2]] = 1
    [:,0] = 0
    
Modified SDR Matrix

    { 0: array([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      1: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      2: array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      3: array([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      4: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      5: array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      6: array([0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      7: array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      8: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
      9: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     10: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     11: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     12: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     13: array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
     14: array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=int8),
     15: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int8),
     16: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int8),
     17: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], dtype=int8),
     18: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int8),
     19: array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype=int8),
     20: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)}
     
Features & Leaves

    features = [1, 2]
    leaves = [3, 6, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19]
    
Features to Change

    nearest neighbors 1: [...]
    nearest neighbors 2: [...]

    [1,1] = 0
    [1,[...]] = 1
    [Any row with 1 in column 1, [...]] = 1
    [:,1] = 0
    
    Repeat for 2
    


In [2]:
def load_original():
    rows = np.fromfile('./rows', dtype=np.uint32)
    cols = np.fromfile('./cols', dtype=np.uint32)
    sdrs = coo_matrix((np.ones(len(rows), dtype=np.int8), (rows, cols-1)), shape=(117791,117791))
    return sdrs

def get_features_leaves(sdrs):
    """
    Find features and leaves of SDR matrix
    :param sdrs: sparse matrix (See scipy.sparse)
    :return: (tuple) indices of features, indices of leaves
    """
    lengths = sdrs.sum(axis=1).getA1()
    counts = sdrs.sum(axis=0).getA1()
    features = np.argwhere(lengths==1).flatten()
    leaves = np.argwhere(counts==1).flatten()
    return features,leaves

def load_distances(shape):
    """
    Load distances from file
    :param shape: tuple for shape of distance matrix
    :return: ndarray of distances with specified shape
    """
    # Assert parameter type
    distances = np.fromfile('./distances.bin',dtype=np.float64)
    return distances.reshape(shape)

In [3]:
sdrs = load_original()
features,leaves = get_features_leaves(sdrs)

In [7]:
todelete = np.array(list(set(features).intersection(leaves)))
print(len(todelete), "features to delete")

0 features to delete


In [5]:
sdrs = sdrs.toarray()
col_map = {x: x for x in range(117791)}
for index in todelete:
    sdrs[index,index] = 0
    col_map[index] = None
sdrs = coo_matrix(sdrs)
features,leaves = get_features_leaves(sdrs)

In [9]:
tochange = np.array(list(set(features).difference(todelete)))
print(len(tochange), "features to change")
tochange.astype(np.uint32).tofile('./tochange.bin')
sdrs.row.tofile('./rows.bin')
sdrs.col.tofile('./cols.bin')

109 features to change


In [18]:
dmatrix = load_distances((-1,117791))
dmatrix[dmatrix == 0.0] = 10.0

In [34]:
sdrs = sdrs.toarray()
for i in range(dmatrix.shape[0]):
    index = tochange[i]
    # Find nearest neighbors
    distances = dmatrix[i,:]
    minimum = np.min(distances)
    mapping = np.argwhere(distances == minimum).flatten()
    # Update SDR
    sdrs[index,index] = 0
    sdrs[index,mapping] = 1
    col_map[index] = mapping
    # Update SDRs using index as feature
    r = np.argwhere(sdrs[:,index] == 1).flatten()
    for row in r:
        sdrs[row,mapping] = 1
    # Remove index as feature
    sdrs[:,index] = 0
    
sdrs = coo_matrix(sdrs)
features,leaves = get_features_leaves(sdrs)

In [49]:
todelete = np.array(list(set(features).intersection(leaves)))
print(len(todelete), "features to delete")
tochange = np.array(list(set(features).difference(todelete)))
print(len(tochange), "features to change")
print(tochange)

0 features to delete
2 features to change
[95269 95270]


In [50]:
"""
Manually change; currently 95269 maps to 95270
Remap 95270 and map 95269 to 95270's mapping
"""
def computeDistance(a, b):
    """
    Compue distance between two numpy arrays
    :param a: numpy array
    :param b: numpy array
    :return: (float) distance
    """
    assert len(a) == len(b), 'Arrays must be same length'
    
    top = np.sum(np.abs(a-b)) ** 2
    bottom = np.sum(np.logical_or(a,b)) * max(a.sum(),b.sum())
    return float(top) / bottom

sdrs = sdrs.toarray()
# Compute dmatrix
distances = []
a = sdrs[95270,:]
for i in range(117791):
    b = sdrs[i,:]
    distances.append(computeDistance(a,b))
distances = np.array(distances)
# Find nearest neighbors
distances[distances == 0] = 10.0
minimum = np.min(distances)
mapping = np.argwhere(distances == minimum).flatten()
# Update 95270
sdrs[95270,95270] = 0
sdrs[95270,mapping] = 1
col_map[95270] = mapping
col_map[95269] = mapping
# Update SDRs using 95270 as feature (this includes 95269)
r = np.argwhere(sdrs[:,index] == 1).flatten()
for row in r:
    sdrs[row,mapping] = 1
# Remove index as feature
sdrs[:,index] = 0
sdrs = coo_matrix(sdrs)
features,leaves = get_features_leaves(sdrs)

In [51]:
todelete = np.array(list(set(features).intersection(leaves)))
print(len(todelete), "features to delete")
tochange = np.array(list(set(features).difference(todelete)))
print(len(tochange), "features to change")
print(tochange)

0 features to delete
2 features to change
[95269 95270]


In [None]:
# Manually update rows 95269 and 95270
sdrs = sdrs.toarray()
sdrs[tochange,:] = 0
sdrs = coo_matrix(sdrs)
features,leaves = get_features_leaves(sdrs)
todelete = np.array(list(set(features).intersection(leaves)))
print(len(todelete), "features to delete")
tochange = np.array(list(set(features).difference(todelete)))
print(len(tochange), "features to change")
print(tochange)

# Checkpoint

In [None]:
# Zero features to delete
# Zero features to change
def load_new():
    rows = np.fromfile('./new_rows.bin', dtype=np.uint32)
    cols = np.fromfile('./new_cols.bin', dtype=np.uint32)
    sdrs = coo_matrix((np.ones(len(rows), dtype=np.int8), (rows, cols)), shape=(117791,117791))
    return sdrs

def get_features_leaves(sdrs):
    """
    Find features and leaves of SDR matrix
    :param sdrs: sparse matrix (See scipy.sparse)
    :return: (tuple) indices of features, indices of leaves
    """
    lengths = sdrs.sum(axis=1).getA1()
    counts = sdrs.sum(axis=0).getA1()
    features = np.argwhere(lengths==1).flatten()
    leaves = np.argwhere(counts==1).flatten()
    return features,leaves

sdrs = load_new()
features,leaves = get_features_leaves(sdrs)

In [58]:
"""TRANSFER IPYNB, ROWS, COLS, LEAVES, DISTANCE.CPP TO AWS EC2 AND COMPUTE THERE"""
len(leaves)
# Columns still represent SDRs so...
# Take ids, get SDR rows, compute distance matrix
# Take min distances by id
# Find min of min distances
# Remap SDR with nearest neighbors
# Repeat steps until 0 leaves

56427

In [None]:
indices = zip(sdrs.row,sdrs.col)
new_indices = []
for i, index in enumerate(indices):
    if index[0] in features:
        continue
    else:
        new_indices.append(index)
rows,cols = zip(*new_indices)
rows = list(rows)
cols = list(cols)
for sdr in tochange:
    new_ON = col_map[sdr]
    if new_ON is None:
        continue
    add_rows = [sdr]*len(new_ON)
    rows.extend(add_rows)
    cols.extend(new_ON)

In [None]:
sdrs = coo_matrix((np.ones(len(rows), dtype=np.int8), (rows,cols)), shape=(117791,117791))
lengths = sdrs.sum(axis=1).getA1()
counts = sdrs.sum(axis=0).getA1()
features = np.argwhere(lengths==1).flatten()
leaves = np.argwhere(counts==1).flatten()

In [None]:
np.sum(lengths==1)

In [None]:
#sdrs = sdrs.toarray()
l = np.sum(sdrs, axis=1)
np.argwhere(l==1).flatten()

# Break

In [None]:
torelocate = np.fromfile('./torelocate.bin', dtype=np.uint32)
dmatrix = np.fromfile('./distances.bin', dtype=np.float64)
dmatrix = dmatrix.reshape((len(torelocate),-1))

In [None]:
sims = [np.sum(dmatrix[x,:] < 1.0)-1 for x in range(dmatrix.shape[0])]
sorts = [np.argsort(dmatrix[x,:])[1:257] for x in range(dmatrix.shape[0])]
d = [dmatrix[x,sorts[x]] for x in range(dmatrix.shape[0])]
d = np.array(d)[:,1:]

In [None]:
comb = np.array(sorts)[:,1:101]
check = list(set(torelocate.reshape(-1)).intersection(comb.reshape(-1)))
len(check)

In [None]:
torelocate

In [None]:
sorts = np.array(sorts).tolist()
for i, s in enumerate(sorts):
    for r in torelocate:
        if r in sorts[i]:
            sorts[i].remove(r)

In [None]:
with open('./mapping.json','r') as fi:
    mapping = json.load(fi)

for i, loc in enumerate(torelocate):
    mapping[loc] = sorts[i]

In [None]:
[np.sum(dmatrix[x,:] < 0.075)-1 for x in range(dmatrix.shape[0])]

In [None]:
lengths = np.fromfile('./sdr_lengths',dtype=np.uint64)
lengths = lengths.astype(np.uint16)

In [None]:
rows = np.fromfile('./rows',dtype=np.uint32)
cols = np.fromfile('./cols',dtype=np.uint32)
sdrs = coo_matrix((np.ones(len(rows),dtype=np.uint8), (rows,cols-1)), shape=(117791,117791))
del rows
del cols

In [None]:
counts = np.array(sdrs.sum(axis=0).tolist()[0])
singles = np.argwhere(counts==1).flatten()

In [None]:
solos = np.argwhere(lengths==1).flatten()

In [None]:
solos = set(solos.tolist())
singles = set(singles.tolist())
todelete = np.array(list(solos.intersection(singles)))
todelete

In [None]:
print(len(singles))
print(len(solos))
counts

In [None]:
mapping = {}
val = 0
for i in range(117791):
    if i not in todelete:
        mapping[i] = val
        val += 1
    else:
        mapping[i] = None

In [None]:
import json
with open('./mapping.json', 'w') as fo:
    json.dump(mapping, fo)

In [None]:
number = 0
for key,val in mapping.items():
    if val is not None:
        number += 1
number

In [None]:
rows = np.fromfile('./rows',dtype=np.uint32)
cols = np.fromfile('./cols',dtype=np.uint32)
r = [mapping[x] for x in rows if mapping[x] is not None]
c = [mapping[x-1] for x in cols if mapping[x-1] is not None]
del rows
del cols
sdrs = coo_matrix((np.ones(len(r),dtype=np.int8), (r,c)), shape=(number,number))
del r
del c

In [None]:
lengths = np.array(sdrs.sum(axis=1).flatten().tolist()[0])
solos = np.argwhere(lengths==1).flatten()
counts = np.array(sdrs.sum(axis=0).tolist()[0])
singles = np.argwhere(counts==1).flatten()
singles

In [None]:
torelocate = np.argwhere(lengths==1).flatten()

In [None]:
sdrs = sdrs.toarray()

In [None]:


dmatrix = []
for x in torelocate:
    a = sdrs[x,:]
    #a = sdrs.getrow(x).toarray().reshape((117622,))
    distances = []
    for i in range(117622):
        #print("\r%d: %d" % (x,i))
        sys.stdout.write("\r%d: %d" % (x,i))
        #b = sdrs.getrow(i).toarray().reshape((117622,))
        b = sdrs[i,:]
        distances.append(computeDistance(a,b))
    dmatrix.append(distances)
dmatrix = np.array(dmatrix)

In [None]:
one = sdrs.getcol(0).toarray().reshape((117791,))
two = sdrs.getcol(1).toarray().reshape((117791,))

In [None]:
co = np.array([one,two])
co.shape

In [None]:
print(np.var(one))
print(np.var(np.ceil(one)))