<a href="https://colab.research.google.com/github/satyamnewale/numpy-Book/blob/main/array_shredding_similarity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
import numpy as np

#1. shredding
def shred(A):
  #vertical shred
  odd_cols = A[:,1::2]
  even_cols = A[:,0::2]

  #horizontal shred
  #on odd_col:
  oc_or = odd_cols[1::2,:]
  oc_er = odd_cols[0::2,:]

  #on even_col:
  ec_or = even_cols[1::2,:]
  ec_er = even_cols[0::2,:]

  return oc_or,oc_er,ec_or,ec_er

# similarity % between two arrays
def similarity(A, B):
    if A.shape != B.shape:
        return 0.0   # different shape → 0 similarity
    return np.mean(A == B) * 100    # percentage

def cosine_similarity(A, B):
    A = A.flatten()
    B = B.flatten()

    dot = np.dot(A, B)
    normA = np.linalg.norm(A)
    normB = np.linalg.norm(B)

    if normA == 0 or normB == 0:
        return 0.0

    return dot / (normA * normB)

def euclidean_distance(A, B):
    A = A.flatten()
    B = B.flatten()

    return np.linalg.norm(A - B)

# check similarity against the 4 shredded arrays
def check_similarity(new_array, shredded_arrays):
    similarities = []

    for idx, arr in enumerate(shredded_arrays):
        score = similarity(new_array, arr)
        cos = cosine_similarity(new_array, arr)
        dist = euclidean_distance(new_array, arr)
        similarities.append((idx+1, score, cos, dist))

    # Check if new_array matches ANY of the four (100% match)
    exact_matches = [i for i, s, c, d in similarities if s > 50.0]

    return similarities, exact_matches

#example usage
np.random.seed(2)
A = np.random.randint(0, 2, (10, 10))
arrays = shred(A)
print(arrays)
new_array = np.random.randint(0, 2, (5, 5))  # choose a known array to test
print(new_array)
similarities, exact = check_similarity(new_array, arrays)

print("Similarity scores:")
for idx, score, cos, dist in similarities:
    print(f"Array {idx}: {score:.2f}, Cosine={cos:.4f}, Euclidean={dist:.2f}")

print("\nExact matches (100%):", exact)


(array([[1, 1, 1, 1, 0],
       [0, 1, 0, 1, 0],
       [0, 0, 1, 1, 1],
       [0, 0, 1, 1, 0],
       [0, 0, 1, 1, 0]]), array([[1, 0, 1, 1, 1],
       [0, 1, 0, 0, 1],
       [0, 1, 1, 0, 0],
       [0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0]]), array([[0, 1, 1, 1, 0],
       [1, 0, 0, 1, 1],
       [0, 0, 0, 1, 1],
       [1, 0, 1, 1, 0],
       [0, 0, 0, 0, 0]]), array([[0, 1, 0, 0, 0],
       [0, 1, 1, 0, 1],
       [0, 0, 1, 1, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 1, 0, 0]]))
[[0 1 0 0 1]
 [0 0 1 1 0]
 [1 1 1 1 0]
 [0 0 1 0 1]
 [0 1 0 0 0]]
Similarity scores:
Array 1: 44.00, Cosine=0.4181, Euclidean=3.74
Array 2: 48.00, Cosine=0.4352, Euclidean=3.61
Array 3: 44.00, Cosine=0.3636, Euclidean=3.74
Array 4: 60.00, Cosine=0.5025, Euclidean=3.16

Exact matches (100%): [4]


---

In [69]:
import numpy as np

# -------- SHREDDING (same as your function) --------
def shred(A):
    odd_cols = A[:,1::2]
    even_cols = A[:,0::2]

    oc_or = odd_cols[1::2,:]
    oc_er = odd_cols[0::2,:]

    ec_or = even_cols[1::2,:]
    ec_er = even_cols[0::2,:]

    return oc_or, oc_er, ec_or, ec_er

def shuffle(A):
    odd_cols = A[:,1::2]
    even_cols = A[:,0::2]
    np.random.shuffle(odd_cols)
    np.random.shuffle(even_cols)
    joint = np.concatenate((odd_cols, even_cols), axis=1)
    return np.random.permutation(joint)

# -------- METRICS --------

def cosine_similarity(A, B):
    A = A.flatten()
    B = B.flatten()
    dot = np.dot(A, B)
    na = np.linalg.norm(A)
    nb = np.linalg.norm(B)
    if na == 0 or nb == 0:
        return 0
    return dot / (na * nb)

def euclidean_distance(A, B):
    return np.linalg.norm(A.flatten() - B.flatten())

def percent_similarity(A, B):
    if A.shape != B.shape:
        return 0
    return np.mean(A == B) * 100


# -------- SLIDING WINDOW: ALL 5×5 SUB-ARRAYS --------

def all_patches(A, size=5):
    patches = []
    for i in range(A.shape[0] - size + 1):
        for j in range(A.shape[1] - size + 1):
            patches.append(A[i:i+size, j:j+size])
    return patches


# -------- MAIN FUNCTION --------

def compute_final_means(A):
    shredded = shred(A)       # (oc_or, oc_er, ec_or, ec_er)
    patches = all_patches(shuffle(A))  # All 36 patches

    cosine_means = []
    euclid_means = []
    score_means = []

    for P in patches:
        # Compare patch P to all 4 shredded arrays
        cos_vals = [cosine_similarity(P, S) for S in shredded]
        euc_vals = [euclidean_distance(P, S) for S in shredded]
        scr_vals = [percent_similarity(P, S) for S in shredded]

        # Mean for each metric across the 4 arrays
        cosine_means.append(np.mean(cos_vals))
        euclid_means.append(np.mean(euc_vals))
        score_means.append(np.mean(scr_vals))

    # Final mean over all patches
    final_cosine = np.mean(cosine_means)
    final_euclid = np.mean(euclid_means)
    final_score = np.mean(score_means)

    return final_cosine, final_euclid, final_score

np.random.seed(69)
A = np.random.randint(1, 100, (10, 10))

print(A)
cosine_final, euclid_final, score_final = compute_final_means(A)

print("Final Mean Cosine Similarity:", cosine_final)
print("Final Mean Euclidean Distance:", euclid_final)
print("Final Mean Percent Match:", score_final)
# after certain increase in varience say value varies from 1-10000000 or more, mean percentage match will not go below 1%.

[[55 76 74 91 56 21 50 23 10 57]
 [98 39 97 89 13 38 76 54  4 81]
 [40 25  3 40 25 48 91 76 45 24]
 [36 44 87 15 22 50 71 70 77 15]
 [15 92 62 49 46 17 94 84 64 12]
 [79 19 45 25 79 30  1 46 63 42]
 [47 84 28 54 49 65 50 97 41 47]
 [ 8 54 69 10 42 10 83 56 50 94]
 [22 54 36  2 30 13  7 72 66  6]
 [26 10 65 38 61 30 81 52 25 29]]
Final Mean Cosine Similarity: 0.7663510900226121
Final Mean Euclidean Distance: 182.21415508409194
Final Mean Percent Match: 2.5
