In [555]:
import cv2
import numpy as np
import glob
import pandas as pd

In [2]:
imglist = []

imgs = [i for i in glob.glob("*.ppm")]
for j in imgs:
    imglist.append(cv2.imread(j))
    
crowd = pd.read_csv('Crowd.csv',header=None)
me = pd.read_csv('MyPreferences.csv',header=None)

I created my preference file as a 40x40 csv table. The below simply iterates through that and finds the three targets that I chose for each query and sorts them into a 40x3 matrix.
Note that the 40x40 table will be used for part 5.2.

In [4]:
mecols = pd.DataFrame(np.zeros([40,4]))

for i in range(0,40):
    counter = 1
    mecols.iloc[i][0] = i+1
    for j in range(0,40):
        if me.gt(0).iloc[i][j]:
            mecols.iloc[i][counter] = j+1
            counter = counter + 1

Step 1:

The below calculates the L1 normalized distances between all 40 images. This was done by hand.

In [414]:
color_scores = np.zeros((40,40))
#The number of leading bits of each channel to use.
leading_blue = 3
leading_green = 4
leading_red = 5

#Nested loop: First the query image and then each of its targets. Yes, this is very redundant and every pair of images ended up being compared twice.
for i in range(0,len(imglist)):
    for j in range(0,len(imglist)):
        #First flattens each channel from a 60x80 matrix into a 5340x1 vector. Then converts that into binary, resulting in a 42720x1 vector. 
        #Finally, we want the binary values to occupy their own rows across 8 columns, so the vectors are reshaed to 5340x8.
        #This is done for all 3 channel of both the query image and the target.
        binaryb1 = np.reshape(np.unpackbits(imglist[i][:,:,0].flatten(),axis=0), (-1, 8))
        binaryg1 = np.reshape(np.unpackbits(imglist[i][:,:,1].flatten(),axis=0), (-1, 8))
        binaryr1 = np.reshape(np.unpackbits(imglist[i][:,:,2].flatten(),axis=0), (-1, 8))

        binaryb2 = np.reshape(np.unpackbits(imglist[j][:,:,0].flatten(),axis=0), (-1, 8))
        binaryg2 = np.reshape(np.unpackbits(imglist[j][:,:,1].flatten(),axis=0), (-1, 8))
        binaryr2 = np.reshape(np.unpackbits(imglist[j][:,:,2].flatten(),axis=0), (-1, 8))
        
        #The first X columns are removed from each of the binary matricies and replaced with 0s, where X is 8-#number of leading bits for that channel.
        #These numbers are then converted back from binary into decimal, effectively completing the process of creating bins.
        #This results in a 5340x1 vector for each channel.
        zerosb = np.zeros((binaryb1.shape[0],(binaryb1.shape[1]-leading_blue)),dtype=int)
        zerosg = np.zeros((binaryg1.shape[0],(binaryg1.shape[1]-leading_green)),dtype=int)
        zerosr = np.zeros((binaryr1.shape[0],(binaryr1.shape[1]-leading_red)),dtype=int)

        reducedb1 = np.packbits(np.column_stack((zerosb,binaryb1[:,0:leading_blue])))
        reducedg1 = np.packbits(np.column_stack((zerosg,binaryg1[:,0:leading_green])))
        reducedr1 = np.packbits(np.column_stack((zerosr,binaryr1[:,0:leading_red])))

        reducedb2 = np.packbits(np.column_stack((zerosb,binaryb2[:,0:leading_blue])))
        reducedg2 = np.packbits(np.column_stack((zerosg,binaryg2[:,0:leading_green])))
        reducedr2 = np.packbits(np.column_stack((zerosr,binaryr2[:,0:leading_red])))

        #Stacks the vectors for each channel into a 5340x3 matrix. reduced1 is the query image and reduced2 is the target.
        #The numpy matricies are converted to pandas dataframes for easier grouping and counting operations.
        reduced1 = pd.DataFrame(np.stack((reducedb1,reducedg1,reducedr1),axis=1))
        reduced2 = pd.DataFrame(np.stack((reducedb2,reducedg2,reducedr2),axis=1))
        
        #Each column represents a different channel.
        reduced1.columns = ['b','g','r']
        reduced2.columns = ['b','g','r']
        
        #We will not count the number of (b,g,r) pairs in both of the data frames. In other words, we're counting how many pixels fall into each of the (b,g,r) bins for both images.
        #Note that this is not all-encompassing; some pairs will be absent from one dataframe that are present in the other and some will be absent from both.
        reduced1['count'] = 1 
        reduced2['count'] = 1 
        withcount1 = reduced1.groupby(['b','g','r'])['count'].count()
        withcount2 = reduced2.groupby(['b','g','r'])['count'].count()
        
        #In order to easily count the difference in (b,g,r) pairs between both images, we must join their tables. An outer join is used as that takes the union of both of their 
        #bin counts.
        reduced = withcount1.to_frame().join(withcount2.to_frame(),on=['b','g','r'],how='outer',lsuffix='1',rsuffix='2').fillna(0)
        
        #Finally, the L1 distance is taken between each of the bin counts. The absolute value is taken, then the sum, and then the value is normalized.
        #The result is a 40x40 matrix of distance values between 0 and 1 where 0 means the images have identical colors.
        color_scores[i][j] =np.sum(np.absolute(reduced['count1']-reduced['count2']))/(2*imglist[0].shape[0]*imglist[0].shape[1])

The below finds the best 3 targets and the worst target for each query.

In [415]:
best_data = []

for i in range(0,40):
    #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
    sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),color_scores))).sort_values(by=(i+1))
    #The #s of the best 3 targets and the worst.
    best3 = sorted_col.iloc[[1,2,3,39]][0]
    #The score between 0 and 1 that each of the images had
    bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

    best_data.append(pd.concat([best3,bestscores],axis=1))

Compares the images to the crowd's opinion

In [416]:
#Each s refers to the score that the crowd gave for the (query, target pair)
#1,2,3 for the best matches and 39 for the worst match (Not 40, because I'm not counting the image with itself as the "best match")

s1 = np.empty(40,dtype=int)
s2 = np.empty(40,dtype=int)
s3 = np.empty(40,dtype=int)
s39 = np.empty(40,dtype=int)
scores = np.empty(40,dtype=int)

for i in range(0,40):
    s1[i] = crowd.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
    s2[i] = crowd.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
    s3[i] = crowd.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
    s39[i] = crowd.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]
    
    scores[i] = s1[i] + s2[i] + s3[i]
    
print(np.sum(scores),np.sum(s39))

4772 195


Compares the images to my opinion.

In [417]:
#Simply counts how many images are in the top 3 for both the system and my own opinion by iterating over them and counting how many top 3s are shared.
intersect = np.empty(40)

for i in range(0,40):
    count = 0
    for j in range(0,3):
        for k in range(0,3):
            if best_data[i][0].iloc[j] == mecols.iloc[i][k]:
                count = count+1
    intersect[i] = count
    
print(intersect, np.sum(intersect))
        

In [65]:
colors = np.round(color_scores,3)

import csv 

f = open('scoresb{}g{}r{}.csv'.format(leading_blue,leading_green,leading_red), 'w',newline='')
#f = open('colorscores.csv', 'w',newline='')

with f:
    writer = csv.writer(f)
    for row in colors:
        writer.writerow(row)

Step 2: Texture

Converts all images to grayscale and pads them with surrounding 0s. cv2 is used for simplicty's sake. All it does it average over the values for all 3 channels for each pixel.
The padding is done manually by appending an extra column of 0s and rows to the right, left, top, and bottom of each image.

In [473]:
gray = []
zerorow = np.zeros((1,89))
zerocol = np.zeros((62,1))

for i in range(0,40):
    grayimg = cv2.cvtColor(imglist[i], cv2.COLOR_BGR2GRAY)
    zerotop = np.append(zerorow,grayimg,axis=0)
    zerobot = np.append(zerotop,zerorow,axis=0)
    zeroleft = np.append(zerocol,zerobot,axis=1)
    zeroright = np.append(zeroleft,zerocol,axis=1)
    gray.append(zeroright)

This performs the Laplacian operation on all pixels of all images.

The image it multiplied by 8, and the sum of all surrounding pixels is taken and multiplied by -1.

The commented diagrams are meant to illustrate the convultion is operating on at each step.
O represents the current pixel
\# represents a surrounding number that is not being operated on at the current step
X represents a surrounding number that is affected by the current operation.

In [630]:
newimglist = []

for i in range(0,40):
    newvalues = np.zeros((60,89))
    for j in range(1,61):
        for k in range(1,90):
            
            posx = j
            posy = k
            ## # #
            # X #
            # # #
            currentpix = gray[i][posx,posy]

            #X X X
            ## O #
            #X X X            
            abovepix = gray[i][[(posx-1),(posx+1)],(posy-1):(posy+2)]

            ## # #
            #X O X
            ## # #
            sidepix = gray[i][posx,[posy-1,posy+1]]
            
            newvalues[j-1][k-1] = (currentpix*8)-(np.sum(abovepix)+np.sum(sidepix))
    newimglist.append(np.absolute(newvalues)) 

Divides each pixel into a bin based on the Laplacian's log_2 value.

The number of objects in the bins are then counted and subtracted from each image to obtain their distance.

In [834]:
texture_vals = np.zeros((40,40))

for i in range(0,len(newimglist)):
    for j in range(0,len(newimglist)):        
        img1 = np.log2(newimglist[i])
        img2 = np.log2(newimglist[j])
        
        img1[img1 == inf] = 0
        img1[img1 == -inf] = 0
        img2[img2 == inf] = 0
        img2[img2 == -inf] = 0
        
        img1 = pd.DataFrame(np.round(img1,0).flatten())[0].value_counts()
        img2 = pd.DataFrame(np.round(img2,0).flatten())[0].value_counts()
               
        
        bincount = img1.to_frame().join(img2.to_frame(),how="outer",lsuffix='1',rsuffix='2').fillna(0)
        texture_vals[i][j] = np.sum(np.absolute(bincount['01']-bincount['02']))/(2*imglist[0].shape[0]*imglist[0].shape[1])     

  """
  


Same method of determining the best 3 vs the crowd as with color 

In [837]:
best_data = []
s1 = np.empty(40,dtype=int)
s2 = np.empty(40,dtype=int)
s3 = np.empty(40,dtype=int)
s39 = np.empty(40,dtype=int)
scores = np.empty(40,dtype=int)

for i in range(0,40):
    #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
    sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),texture_vals))).sort_values(by=(i+1))
    #The #s of the best 3 targets and the worst.
    best3 = sorted_col.iloc[[1,2,3,39]][0]
    #The score between 0 and 1 that each of the images had
    bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

    best_data.append(pd.concat([best3,bestscores],axis=1))
    
for i in range(0,40):
    s1[i] = crowd.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
    s2[i] = crowd.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
    s3[i] = crowd.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
    s39[i] = crowd.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]
    
    scores[i] = s1[i] + s2[i] + s3[i]
    
print(np.sum(scores),np.sum(s39))

3787 39


Same method as determining the best 3 vs user as with color

In [831]:
intersect = np.empty(40)

for i in range(0,40):
    count = 0
    for j in range(0,3):
        for k in range(0,3):
            if best_data[i][0].iloc[j] == mecols.iloc[i][k]:
                count = count+1
    intersect[i] = count
    
print(intersect, np.sum(intersect))

[1. 0. 1. 0. 0. 0. 1. 2. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 2. 0. 0.] 14.0


PART 3: Shape

Converts each image to binary using cv2's threshold function. The threshold value was hand-picked based on results.

In [1300]:
binary = []

for i in range(0,40):
    grayimg = cv2.cvtColor(imglist[i], cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(grayimg,70,225,cv2.THRESH_BINARY)
    #ret,thresh = cv2.threshold(blur,0,255,cv2.THRESH_OTSU+cv2.THRESH_BINARY)
    binary.append(thresh)

To test the binarizations of images

In [868]:
cv2.imshow("Bsf",binary[2])
cv2.imshow("Binary",binary[34])
cv2.waitKey(0)
cv2.destroyAllWindows()

Calulates the shape distance between two images by simply counting up how many pixels did not overlap in the binar images

In [1301]:
shape_vals = np.zeros((40,40))

for i in range(0,40):
    for j in range(0,40):
        shape_vals[i][j] = np.sum(binary[i]!=binary[j])/(60*89)

Same method of determining the best 3 vs the crowd as with color and texture

In [1302]:
best_data = []
s1 = np.empty(40,dtype=int)
s2 = np.empty(40,dtype=int)
s3 = np.empty(40,dtype=int)
s39 = np.empty(40,dtype=int)
scores = np.empty(40,dtype=int)

for i in range(0,40):
    #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
    sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),shape_vals))).sort_values(by=(i+1))
    #The #s of the best 3 targets and the worst.
    best3 = sorted_col.iloc[[1,2,3,39]][0]
    #The score between 0 and 1 that each of the images had
    bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

    best_data.append(pd.concat([best3,bestscores],axis=1))
    
for i in range(0,40):
    s1[i] = crowd.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
    s2[i] = crowd.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
    s3[i] = crowd.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
    s39[i] = crowd.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]
    
    scores[i] = s1[i] + s2[i] + s3[i]
    
print(np.sum(scores),np.sum(s39))

4111 290


Same method of determining the best 3 vs the user as with color and texture

In [1296]:
intersect = np.empty(40)

for i in range(0,40):
    count = 0
    for j in range(0,3):
        for k in range(0,3):
            if best_data[i][0].iloc[j] == mecols.iloc[i][k]:
                count = count+1
    intersect[i] = count
    
print(intersect, np.sum(intersect))

[0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 2. 0. 1. 1. 1. 2. 0. 0. 1. 1. 2.
 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1.] 24.0


Optimizing the sytem to incorporate all color, texture, and shape together. The below is used to simulate potential a, b, and c values.

In [1303]:
a_vec = []
b_vec = []
c_vec = []
thescores = []
count = 0

while count < 1000:
    a = random()
    b = random()*(1-a)
    c = 1-(a+b)
    
    scale_color = a*color_scores
    scale_text = b*texture_vals
    scale_shape = c*shape_vals

    scale_vals = scale_color+scale_text+scale_shape
    
    best_data = []
    s1 = np.empty(40,dtype=int)
    s2 = np.empty(40,dtype=int)
    s3 = np.empty(40,dtype=int)
    s39 = np.empty(40,dtype=int)
    scores = np.empty(40,dtype=int)

    for i in range(0,40):
        #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
        sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),scale_vals))).sort_values(by=(i+1))
        #The #s of the best 3 targets and the worst.
        best3 = sorted_col.iloc[[1,2,3,39]][0]
        #The score between 0 and 1 that each of the images had
        bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

        best_data.append(pd.concat([best3,bestscores],axis=1))

    for i in range(0,40):
        s1[i] = crowd.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
        s2[i] = crowd.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
        s3[i] = crowd.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
        s39[i] = crowd.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]

        scores[i] = s1[i] + s2[i] + s3[i]

    a_vec.append(a)
    b_vec.append(b)
    c_vec.append(c)
    thescores.append(np.sum(scores))

    count = count+1

Returns the a, b, and c values that produced the highest score.

In [1185]:
index = thescores.index(np.max(thescores))
thescores[index]

print("a: ",a_vec[index],"\t b:",b_vec[index],"\t c:",c_vec[index],"\t score",thescores[index])

a:  0.4822628435838102 	 b: 0.3519389087923588 	 c: 0.165798247623831 	 score 6465


These were the highest values as produced above. They're evaluted as normal.

In [1208]:
a = 0.26
b = 0.21
c = 1-(a+b)

scale_color = a*color_scores
scale_text = b*texture_vals
scale_shape = c*shape_vals

scale_vals = scale_color+scale_text+scale_shape

Best 3 targets vs crowd

In [1209]:
best_data = []
s1 = np.empty(40,dtype=int)
s2 = np.empty(40,dtype=int)
s3 = np.empty(40,dtype=int)
s39 = np.empty(40,dtype=int)
scores = np.empty(40,dtype=int)

for i in range(0,40):
    #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
    sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),scale_vals))).sort_values(by=(i+1))
    #The #s of the best 3 targets and the worst.
    best3 = sorted_col.iloc[[1,2,3,39]][0]
    #The score between 0 and 1 that each of the images had
    bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

    best_data.append(pd.concat([best3,bestscores],axis=1))
    
for i in range(0,40):
    s1[i] = crowd.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
    s2[i] = crowd.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
    s3[i] = crowd.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
    s39[i] = crowd.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]
    
    scores[i] = s1[i] + s2[i] + s3[i]
    
print(np.sum(scores),np.sum(s39))

6475 59


Best 3 targets vs self

In [1210]:
intersect = np.empty(40)

for i in range(0,40):
    count = 0
    for j in range(0,3):
        for k in range(0,3):
            if best_data[i][0].iloc[j] == mecols.iloc[i][k]:
                count = count+1
    intersect[i] = count
    
print(intersect, np.sum(intersect))

[1. 0. 1. 2. 1. 1. 2. 2. 1. 1. 1. 1. 1. 2. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0.
 1. 1. 1. 0. 2. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1.] 31.0


Calculates the maximum possible crowd score by taking the 3 highest values in each row and summing them.

In [1272]:
maxcrowd = []

for i in range(0,40):
    maxcrowd.append(np.sum(np.sort(crowd.iloc[i])[[-1,-2,-3]]))
np.sum(maxcrowd)

9853

Same maximazation proceduce for color, texture, and shape as before, but this time for my own preferences.

In [1286]:
a_vec = []
b_vec = []
c_vec = []
thescores = []
count = 0

while count < 1000:
    a = random()
    b = random()*(1-a)
    c = 1-(a+b)
    
    scale_color = a*color_scores
    scale_text = b*texture_vals
    scale_shape = c*shape_vals

    scale_vals = scale_color+scale_text+scale_shape
    
    best_data = []
    s1 = np.empty(40,dtype=int)
    s2 = np.empty(40,dtype=int)
    s3 = np.empty(40,dtype=int)
    s39 = np.empty(40,dtype=int)
    scores = np.empty(40,dtype=int)

    for i in range(0,40):
        #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
        sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),scale_vals))).sort_values(by=(i+1))
        #The #s of the best 3 targets and the worst.
        best3 = sorted_col.iloc[[1,2,3,39]][0]
        #The score between 0 and 1 that each of the images had
        bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

        best_data.append(pd.concat([best3,bestscores],axis=1))

    for i in range(0,40):
        s1[i] = me.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
        s2[i] = me.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
        s3[i] = me.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
        s39[i] = me.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]

        scores[i] = s1[i] + s2[i] + s3[i]

    a_vec.append(a)
    b_vec.append(b)
    c_vec.append(c)
    thescores.append(np.sum(scores))

    count = count+1

Returns the a, b, and c values the produced the maximum score.

In [1287]:
index = thescores.index(np.max(thescores))
thescores[index]

print("a: ",a_vec[index],"\t b:",b_vec[index],"\t c:",c_vec[index],"\t score",thescores[index])

a:  0.16530143618539272 	 b: 0.19851976110678066 	 c: 0.6361788027078266 	 score 102


Plug those maximum values in and calculate how the top 3 results perform against my own weighted and unweighted opinions.

In [1294]:
a = .15 
b = .2
c = 1-(a+b)

scale_color = a*color_scores
scale_text = b*texture_vals
scale_shape = c*shape_vals

scale_vals = scale_color+scale_text+scale_shape

best_data = []
s1 = np.empty(40,dtype=int)
s2 = np.empty(40,dtype=int)
s3 = np.empty(40,dtype=int)
s39 = np.empty(40,dtype=int)
scores = np.empty(40,dtype=int)

for i in range(0,40):
    #Sorts the column pertaining to the query image and finds the 2nd, 3rd, and 4th lowest values (not the first because the lowest value for each image 0 is with itself.)
    sorted_col = pd.DataFrame(np.column_stack(((np.arange(1,41,1)),scale_vals))).sort_values(by=(i+1))
    #The #s of the best 3 targets and the worst.
    best3 = sorted_col.iloc[[1,2,3,39]][0]
    #The score between 0 and 1 that each of the images had
    bestscores = sorted_col.iloc[[1,2,3,39]][i+1]

    best_data.append(pd.concat([best3,bestscores],axis=1))
    
for i in range(0,40):
    s1[i] = me.iloc[i][(best_data[i].iloc[0][0].astype('int16'))-1]
    s2[i] = me.iloc[i][(best_data[i].iloc[1][0].astype('int16'))-1]
    s3[i] = me.iloc[i][(best_data[i].iloc[2][0].astype('int16'))-1]
    s39[i] = me.iloc[i][(best_data[i].iloc[3][0].astype('int16'))-1]
    
    scores[i] = s1[i] + s2[i] + s3[i]
    
print(np.sum(scores),np.sum(s39))

intersect = np.empty(40)

for i in range(0,40):
    count = 0
    for j in range(0,3):
        for k in range(0,3):
            if best_data[i][0].iloc[j] == mecols.iloc[i][k]:
                count = count+1
    intersect[i] = count
    
print(intersect, np.sum(intersect))

104 0
[1. 0. 0. 1. 1. 0. 2. 2. 1. 1. 1. 1. 1. 2. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1.
 1. 1. 1. 0. 2. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1.] 32.0
